Multi-AutoML-Interface / src /code_gen_utils.py
PedroM2626's picture
Add ONNX export utilities, pipeline parser, and PyCaret integration
9244b7e
import os
def generate_consumption_code(model_type: str, run_id: str, target_column: str) -> str:
"""
Generates a Python code snippet to load and run predictions with the trained model.
Supports: autogluon, flaml, h2o, tpot, pycaret, lale.
"""
try:
client = mlflow.tracking.MlflowClient()
run = client.get_run(run_id)
task_type = run.data.params.get("task_type", "Classification")
except Exception:
task_type = "Classification"
base_code = f"""# Sample code to consume the trained model
# Run ID: {run_id}
# Model Type: {model_type}
# Task Type: {task_type}
import os
import pandas as pd
import mlflow
"""
if model_type == "autogluon":
return base_code + f"""
from autogluon.tabular import TabularPredictor
# 1. Download model from MLflow
local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
# 2. Load model
predictor = TabularPredictor.load(local_path)
# 3. Predict
# data = pd.read_csv("your_data.csv")
# predictions = predictor.predict(data)
# print(predictions)
"""
elif model_type == "flaml":
return base_code + f"""
import pickle
# 1. Download model from MLflow
local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
# 2. Load the .pkl file
model = None
for root, dirs, files in os.walk(local_path):
for f in files:
if f.endswith(".pkl"):
with open(os.path.join(root, f), "rb") as fh:
model = pickle.load(fh)
break
if model is None:
raise FileNotFoundError("Model .pkl not found in artifacts.")
# 3. Predict
# data = pd.read_csv("your_data.csv")
# predictions = model.predict(data)
# print(predictions)
"""
elif model_type == "h2o":
return base_code + f"""
import h2o
# 1. Initialize H2O
h2o.init()
# 2. Download model from MLflow
local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
# 3. Load the H2O model
model = None
for root, dirs, files in os.walk(local_path):
for f in files:
if f.endswith(".zip") or "." not in f:
model = h2o.load_model(os.path.join(root, f))
break
# 4. Predict
# h2o_frame = h2o.H2OFrame(pd.read_csv("your_data.csv"))
# predictions = model.predict(h2o_frame)
# print(predictions.as_data_frame())
"""
elif model_type == "tpot":
return base_code + f"""
import mlflow.sklearn
# 1. Load model directly from MLflow
model = mlflow.sklearn.load_model("runs:/{run_id}/model")
# 2. Predict
# data = pd.read_csv("your_data.csv")
# predictions = model.predict(data)
# print(predictions)
"""
elif model_type == "pycaret":
if task_type == "Regression":
pc_module = "pycaret.regression"
elif task_type == "Time Series Forecasting":
pc_module = "pycaret.time_series"
else:
pc_module = "pycaret.classification"
return base_code + f"""
import joblib
from {pc_module} import load_model, predict_model
# 1. Download model artifact from MLflow
local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
# 2. Find and load the PyCaret .pkl file
model_path = None
for root, dirs, files in os.walk(local_path):
for f in files:
if f.endswith(".pkl"):
model_path = os.path.join(root, f).replace(".pkl", "")
break
if model_path is None:
raise FileNotFoundError("PyCaret model .pkl not found in artifacts.")
model = load_model(model_path)
# 3. Predict
# data = pd.read_csv("your_data.csv") # For classification/regression, must NOT contain target column
# predictions = predict_model(model, data=data)
# print(predictions)
"""
elif model_type == "lale":
return base_code + f"""
import joblib
import numpy as np
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
# 1. Download model artifact from MLflow
local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
# 2. Find and load the Lale joblib bundle
bundle = None
for root, dirs, files in os.walk(local_path):
for f in files:
if f.endswith(".pkl"):
bundle = joblib.load(os.path.join(root, f))
break
if bundle is None:
raise FileNotFoundError("Lale model .pkl not found in artifacts.")
model = bundle["model"]
col_encoders = bundle.get("col_encoders", {{}})
y_encoder = bundle.get("y_encoder", None)
# 3. Preprocess and Predict
# data = pd.read_csv("your_data.csv") # must NOT contain target column
#
# for col, enc in col_encoders.items():
# data[col] = enc.transform(data[[col]]).ravel()
#
# raw_preds = model.predict(data.values)
#
# if y_encoder is not None:
# predictions = y_encoder.inverse_transform(raw_preds)
# else:
# predictions = raw_preds
#
# print(predictions)
"""
else:
return base_code + f"""
# Code generation for '{model_type}' is not explicitly implemented.
# Try loading via: mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
"""
def _load_code_for_deploy(model_type: str, run_id: str) -> str:
"""Returns the model-loading block used in the FastAPI main.py."""
if model_type == "autogluon":
return f"""
from autogluon.tabular import TabularPredictor
import mlflow
_local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
model = TabularPredictor.load(_local)
def _predict(df):
return model.predict(df).tolist()
"""
elif model_type == "flaml":
return f"""
import pickle, os, mlflow
_local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
model = None
for root, _, files in os.walk(_local):
for f in files:
if f.endswith(".pkl"):
with open(os.path.join(root, f), "rb") as fh:
model = pickle.load(fh)
break
if model is None:
raise FileNotFoundError("FLAML model not found.")
def _predict(df):
return model.predict(df).tolist()
"""
elif model_type == "h2o":
return f"""
import h2o, os, mlflow
h2o.init()
_local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
model = None
for root, _, files in os.walk(_local):
for f in files:
if f.endswith(".zip") or "." not in f:
model = h2o.load_model(os.path.join(root, f))
break
def _predict(df):
hf = h2o.H2OFrame(df)
return model.predict(hf).as_data_frame()["predict"].tolist()
"""
elif model_type == "tpot":
return f"""
import mlflow.sklearn
model = mlflow.sklearn.load_model("runs:/{run_id}/model")
def _predict(df):
return model.predict(df).tolist()
"""
elif model_type == "pycaret":
return f"""
import os, mlflow, joblib
import pandas as pd
_local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
try:
client = mlflow.tracking.MlflowClient()
run = client.get_run("{run_id}")
task_type = run.data.params.get("task_type", "Classification")
except Exception:
task_type = "Classification"
if task_type == "Regression":
from pycaret.regression import load_model, predict_model
elif task_type == "Time Series Forecasting":
from pycaret.time_series import load_model, predict_model
else:
from pycaret.classification import load_model, predict_model
_mpath = None
for root, _, files in os.walk(_local):
for f in files:
if f.endswith(".pkl"):
_mpath = os.path.join(root, f).replace(".pkl", "")
break
if _mpath is None:
raise FileNotFoundError("PyCaret model not found.")
model = load_model(_mpath)
def _predict(df):
preds = predict_model(model, data=df)
if task_type == "Classification" and "prediction_label" in preds.columns:
return preds["prediction_label"].tolist()
else:
# For regression or time series, it might return 'prediction_label' or just predictions
if "prediction_label" in preds.columns:
return preds["prediction_label"].tolist()
return preds.iloc[:, 0].tolist()
"""
elif model_type == "lale":
return f"""
import os, mlflow, joblib
import numpy as np
_local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
_bundle = None
for root, _, files in os.walk(_local):
for f in files:
if f.endswith(".pkl"):
_bundle = joblib.load(os.path.join(root, f))
break
if _bundle is None:
raise FileNotFoundError("Lale model not found.")
_model = _bundle["model"]
_col_encoders = _bundle.get("col_encoders", {{}})
_y_encoder = _bundle.get("y_encoder", None)
def _predict(df):
import pandas as _pd
df = _pd.DataFrame(df)
for col, enc in _col_encoders.items():
if col in df.columns:
df[col] = enc.transform(df[[col]]).ravel()
raw = _model.predict(df.values)
if _y_encoder is not None:
return _y_encoder.inverse_transform(raw).tolist()
return raw.tolist()
"""
else:
return """
model = None
def _predict(df):
return []
"""
def generate_api_deployment(model_type: str, run_id: str, target_column: str, output_dir: str = "deploy") -> str:
"""
Generates a ready-to-use FastAPI + Docker deployment package for the model.
Supports: autogluon, flaml, h2o, tpot, pycaret, lale.
"""
os.makedirs(output_dir, exist_ok=True)
load_code = _load_code_for_deploy(model_type, run_id)
main_py = f"""from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import pandas as pd
import os
app = FastAPI(title="AutoML Generated API - {model_type}", version="1.0")
# --- Model Loading ---
{load_code}
# ---------------------
@app.get("/")
def health():
return {{"status": "running", "model": "{model_type}", "run_id": "{run_id}"}}
@app.post("/predict")
def predict(payload: dict):
try:
if "data" in payload:
df = pd.DataFrame(payload["data"])
else:
df = pd.DataFrame([payload])
return {{"predictions": _predict(df)}}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
"""
with open(os.path.join(output_dir, "main.py"), "w", encoding="utf-8") as f:
f.write(main_py)
# requirements.txt
base_reqs = """fastapi==0.104.1
uvicorn==0.24.0
pydantic==2.5.2
pandas==2.1.4
mlflow==2.9.2
"""
extra = {
"autogluon": "autogluon==1.0.0\n",
"flaml": "flaml==2.1.2\n",
"h2o": "h2o==3.44.0.3\n",
"tpot": "tpot==0.12.2\nscikit-learn==1.2.2\n",
"pycaret": "pycaret==3.3.0\nscikit-learn==1.2.2\nscipy==1.11.4\n",
"lale": "lale==0.9.1\nscikit-learn==1.2.2\njoblib\nhyperopt\n",
}
reqs = base_reqs + extra.get(model_type, "")
with open(os.path.join(output_dir, "requirements.txt"), "w", encoding="utf-8") as f:
f.write(reqs)
# Dockerfile
dockerfile = f"""FROM python:3.11-slim
WORKDIR /app
RUN apt-get update && apt-get install -y \\
build-essential libgomp1 libgl1 python3-dev default-jre curl \\
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY main.py .
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
"""
with open(os.path.join(output_dir, "Dockerfile"), "w", encoding="utf-8") as f:
f.write(dockerfile)
# README
readme = f"""# API Deployment — {model_type} (Run: {run_id})
## Local
```bash
pip install -r requirements.txt
python main.py
```
## Docker
```bash
docker build -t ml-api:{run_id[:8]} .
docker run -p 8000:8000 ml-api:{run_id[:8]}
```
## Example request
```json
POST http://localhost:8000/predict
{{
"data": [{{"feature1": 1.5, "feature2": "value"}}]
}}
```
"""
with open(os.path.join(output_dir, "README.md"), "w", encoding="utf-8") as f:
f.write(readme)
return output_dir