import os def generate_consumption_code(model_type: str, run_id: str, target_column: str) -> str: """ Generates a Python code snippet to load and run predictions with the trained model. Supports: autogluon, flaml, h2o, tpot, pycaret, lale. """ try: client = mlflow.tracking.MlflowClient() run = client.get_run(run_id) task_type = run.data.params.get("task_type", "Classification") except Exception: task_type = "Classification" base_code = f"""# Sample code to consume the trained model # Run ID: {run_id} # Model Type: {model_type} # Task Type: {task_type} import os import pandas as pd import mlflow """ if model_type == "autogluon": return base_code + f""" from autogluon.tabular import TabularPredictor # 1. Download model from MLflow local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") # 2. Load model predictor = TabularPredictor.load(local_path) # 3. Predict # data = pd.read_csv("your_data.csv") # predictions = predictor.predict(data) # print(predictions) """ elif model_type == "flaml": return base_code + f""" import pickle # 1. Download model from MLflow local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") # 2. Load the .pkl file model = None for root, dirs, files in os.walk(local_path): for f in files: if f.endswith(".pkl"): with open(os.path.join(root, f), "rb") as fh: model = pickle.load(fh) break if model is None: raise FileNotFoundError("Model .pkl not found in artifacts.") # 3. Predict # data = pd.read_csv("your_data.csv") # predictions = model.predict(data) # print(predictions) """ elif model_type == "h2o": return base_code + f""" import h2o # 1. Initialize H2O h2o.init() # 2. Download model from MLflow local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") # 3. Load the H2O model model = None for root, dirs, files in os.walk(local_path): for f in files: if f.endswith(".zip") or "." not in f: model = h2o.load_model(os.path.join(root, f)) break # 4. Predict # h2o_frame = h2o.H2OFrame(pd.read_csv("your_data.csv")) # predictions = model.predict(h2o_frame) # print(predictions.as_data_frame()) """ elif model_type == "tpot": return base_code + f""" import mlflow.sklearn # 1. Load model directly from MLflow model = mlflow.sklearn.load_model("runs:/{run_id}/model") # 2. Predict # data = pd.read_csv("your_data.csv") # predictions = model.predict(data) # print(predictions) """ elif model_type == "pycaret": if task_type == "Regression": pc_module = "pycaret.regression" elif task_type == "Time Series Forecasting": pc_module = "pycaret.time_series" else: pc_module = "pycaret.classification" return base_code + f""" import joblib from {pc_module} import load_model, predict_model # 1. Download model artifact from MLflow local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") # 2. Find and load the PyCaret .pkl file model_path = None for root, dirs, files in os.walk(local_path): for f in files: if f.endswith(".pkl"): model_path = os.path.join(root, f).replace(".pkl", "") break if model_path is None: raise FileNotFoundError("PyCaret model .pkl not found in artifacts.") model = load_model(model_path) # 3. Predict # data = pd.read_csv("your_data.csv") # For classification/regression, must NOT contain target column # predictions = predict_model(model, data=data) # print(predictions) """ elif model_type == "lale": return base_code + f""" import joblib import numpy as np from sklearn.preprocessing import OrdinalEncoder, LabelEncoder # 1. Download model artifact from MLflow local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") # 2. Find and load the Lale joblib bundle bundle = None for root, dirs, files in os.walk(local_path): for f in files: if f.endswith(".pkl"): bundle = joblib.load(os.path.join(root, f)) break if bundle is None: raise FileNotFoundError("Lale model .pkl not found in artifacts.") model = bundle["model"] col_encoders = bundle.get("col_encoders", {{}}) y_encoder = bundle.get("y_encoder", None) # 3. Preprocess and Predict # data = pd.read_csv("your_data.csv") # must NOT contain target column # # for col, enc in col_encoders.items(): # data[col] = enc.transform(data[[col]]).ravel() # # raw_preds = model.predict(data.values) # # if y_encoder is not None: # predictions = y_encoder.inverse_transform(raw_preds) # else: # predictions = raw_preds # # print(predictions) """ else: return base_code + f""" # Code generation for '{model_type}' is not explicitly implemented. # Try loading via: mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") """ def _load_code_for_deploy(model_type: str, run_id: str) -> str: """Returns the model-loading block used in the FastAPI main.py.""" if model_type == "autogluon": return f""" from autogluon.tabular import TabularPredictor import mlflow _local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") model = TabularPredictor.load(_local) def _predict(df): return model.predict(df).tolist() """ elif model_type == "flaml": return f""" import pickle, os, mlflow _local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") model = None for root, _, files in os.walk(_local): for f in files: if f.endswith(".pkl"): with open(os.path.join(root, f), "rb") as fh: model = pickle.load(fh) break if model is None: raise FileNotFoundError("FLAML model not found.") def _predict(df): return model.predict(df).tolist() """ elif model_type == "h2o": return f""" import h2o, os, mlflow h2o.init() _local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") model = None for root, _, files in os.walk(_local): for f in files: if f.endswith(".zip") or "." not in f: model = h2o.load_model(os.path.join(root, f)) break def _predict(df): hf = h2o.H2OFrame(df) return model.predict(hf).as_data_frame()["predict"].tolist() """ elif model_type == "tpot": return f""" import mlflow.sklearn model = mlflow.sklearn.load_model("runs:/{run_id}/model") def _predict(df): return model.predict(df).tolist() """ elif model_type == "pycaret": return f""" import os, mlflow, joblib import pandas as pd _local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") try: client = mlflow.tracking.MlflowClient() run = client.get_run("{run_id}") task_type = run.data.params.get("task_type", "Classification") except Exception: task_type = "Classification" if task_type == "Regression": from pycaret.regression import load_model, predict_model elif task_type == "Time Series Forecasting": from pycaret.time_series import load_model, predict_model else: from pycaret.classification import load_model, predict_model _mpath = None for root, _, files in os.walk(_local): for f in files: if f.endswith(".pkl"): _mpath = os.path.join(root, f).replace(".pkl", "") break if _mpath is None: raise FileNotFoundError("PyCaret model not found.") model = load_model(_mpath) def _predict(df): preds = predict_model(model, data=df) if task_type == "Classification" and "prediction_label" in preds.columns: return preds["prediction_label"].tolist() else: # For regression or time series, it might return 'prediction_label' or just predictions if "prediction_label" in preds.columns: return preds["prediction_label"].tolist() return preds.iloc[:, 0].tolist() """ elif model_type == "lale": return f""" import os, mlflow, joblib import numpy as np _local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model") _bundle = None for root, _, files in os.walk(_local): for f in files: if f.endswith(".pkl"): _bundle = joblib.load(os.path.join(root, f)) break if _bundle is None: raise FileNotFoundError("Lale model not found.") _model = _bundle["model"] _col_encoders = _bundle.get("col_encoders", {{}}) _y_encoder = _bundle.get("y_encoder", None) def _predict(df): import pandas as _pd df = _pd.DataFrame(df) for col, enc in _col_encoders.items(): if col in df.columns: df[col] = enc.transform(df[[col]]).ravel() raw = _model.predict(df.values) if _y_encoder is not None: return _y_encoder.inverse_transform(raw).tolist() return raw.tolist() """ else: return """ model = None def _predict(df): return [] """ def generate_api_deployment(model_type: str, run_id: str, target_column: str, output_dir: str = "deploy") -> str: """ Generates a ready-to-use FastAPI + Docker deployment package for the model. Supports: autogluon, flaml, h2o, tpot, pycaret, lale. """ os.makedirs(output_dir, exist_ok=True) load_code = _load_code_for_deploy(model_type, run_id) main_py = f"""from fastapi import FastAPI, HTTPException from pydantic import BaseModel import pandas as pd import os app = FastAPI(title="AutoML Generated API - {model_type}", version="1.0") # --- Model Loading --- {load_code} # --------------------- @app.get("/") def health(): return {{"status": "running", "model": "{model_type}", "run_id": "{run_id}"}} @app.post("/predict") def predict(payload: dict): try: if "data" in payload: df = pd.DataFrame(payload["data"]) else: df = pd.DataFrame([payload]) return {{"predictions": _predict(df)}} except Exception as e: raise HTTPException(status_code=400, detail=str(e)) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000) """ with open(os.path.join(output_dir, "main.py"), "w", encoding="utf-8") as f: f.write(main_py) # requirements.txt base_reqs = """fastapi==0.104.1 uvicorn==0.24.0 pydantic==2.5.2 pandas==2.1.4 mlflow==2.9.2 """ extra = { "autogluon": "autogluon==1.0.0\n", "flaml": "flaml==2.1.2\n", "h2o": "h2o==3.44.0.3\n", "tpot": "tpot==0.12.2\nscikit-learn==1.2.2\n", "pycaret": "pycaret==3.3.0\nscikit-learn==1.2.2\nscipy==1.11.4\n", "lale": "lale==0.9.1\nscikit-learn==1.2.2\njoblib\nhyperopt\n", } reqs = base_reqs + extra.get(model_type, "") with open(os.path.join(output_dir, "requirements.txt"), "w", encoding="utf-8") as f: f.write(reqs) # Dockerfile dockerfile = f"""FROM python:3.11-slim WORKDIR /app RUN apt-get update && apt-get install -y \\ build-essential libgomp1 libgl1 python3-dev default-jre curl \\ && rm -rf /var/lib/apt/lists/* COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY main.py . EXPOSE 8000 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] """ with open(os.path.join(output_dir, "Dockerfile"), "w", encoding="utf-8") as f: f.write(dockerfile) # README readme = f"""# API Deployment — {model_type} (Run: {run_id}) ## Local ```bash pip install -r requirements.txt python main.py ``` ## Docker ```bash docker build -t ml-api:{run_id[:8]} . docker run -p 8000:8000 ml-api:{run_id[:8]} ``` ## Example request ```json POST http://localhost:8000/predict {{ "data": [{{"feature1": 1.5, "feature2": "value"}}] }} ``` """ with open(os.path.join(output_dir, "README.md"), "w", encoding="utf-8") as f: f.write(readme) return output_dir