File size: 11,989 Bytes
9244b7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
import os


def generate_consumption_code(model_type: str, run_id: str, target_column: str) -> str:
    """
    Generates a Python code snippet to load and run predictions with the trained model.
    Supports: autogluon, flaml, h2o, tpot, pycaret, lale.
    """
    try:
        client = mlflow.tracking.MlflowClient()
        run = client.get_run(run_id)
        task_type = run.data.params.get("task_type", "Classification")
    except Exception:
        task_type = "Classification"

    base_code = f"""# Sample code to consume the trained model
# Run ID: {run_id}
# Model Type: {model_type}
# Task Type: {task_type}

import os
import pandas as pd
import mlflow
"""

    if model_type == "autogluon":
        return base_code + f"""
from autogluon.tabular import TabularPredictor

# 1. Download model from MLflow
local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")

# 2. Load model
predictor = TabularPredictor.load(local_path)

# 3. Predict
# data = pd.read_csv("your_data.csv")
# predictions = predictor.predict(data)
# print(predictions)
"""

    elif model_type == "flaml":
        return base_code + f"""
import pickle

# 1. Download model from MLflow
local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")

# 2. Load the .pkl file
model = None
for root, dirs, files in os.walk(local_path):
    for f in files:
        if f.endswith(".pkl"):
            with open(os.path.join(root, f), "rb") as fh:
                model = pickle.load(fh)
            break

if model is None:
    raise FileNotFoundError("Model .pkl not found in artifacts.")

# 3. Predict
# data = pd.read_csv("your_data.csv")
# predictions = model.predict(data)
# print(predictions)
"""

    elif model_type == "h2o":
        return base_code + f"""
import h2o

# 1. Initialize H2O
h2o.init()

# 2. Download model from MLflow
local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")

# 3. Load the H2O model
model = None
for root, dirs, files in os.walk(local_path):
    for f in files:
        if f.endswith(".zip") or "." not in f:
            model = h2o.load_model(os.path.join(root, f))
            break

# 4. Predict
# h2o_frame = h2o.H2OFrame(pd.read_csv("your_data.csv"))
# predictions = model.predict(h2o_frame)
# print(predictions.as_data_frame())
"""

    elif model_type == "tpot":
        return base_code + f"""
import mlflow.sklearn

# 1. Load model directly from MLflow
model = mlflow.sklearn.load_model("runs:/{run_id}/model")

# 2. Predict
# data = pd.read_csv("your_data.csv")
# predictions = model.predict(data)
# print(predictions)
"""

    elif model_type == "pycaret":
        if task_type == "Regression":
            pc_module = "pycaret.regression"
        elif task_type == "Time Series Forecasting":
            pc_module = "pycaret.time_series"
        else:
            pc_module = "pycaret.classification"

        return base_code + f"""
import joblib
from {pc_module} import load_model, predict_model

# 1. Download model artifact from MLflow
local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")

# 2. Find and load the PyCaret .pkl file
model_path = None
for root, dirs, files in os.walk(local_path):
    for f in files:
        if f.endswith(".pkl"):
            model_path = os.path.join(root, f).replace(".pkl", "")
            break

if model_path is None:
    raise FileNotFoundError("PyCaret model .pkl not found in artifacts.")

model = load_model(model_path)

# 3. Predict
# data = pd.read_csv("your_data.csv")  # For classification/regression, must NOT contain target column
# predictions = predict_model(model, data=data)
# print(predictions)
"""

    elif model_type == "lale":
        return base_code + f"""
import joblib
import numpy as np
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder

# 1. Download model artifact from MLflow
local_path = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")

# 2. Find and load the Lale joblib bundle
bundle = None
for root, dirs, files in os.walk(local_path):
    for f in files:
        if f.endswith(".pkl"):
            bundle = joblib.load(os.path.join(root, f))
            break

if bundle is None:
    raise FileNotFoundError("Lale model .pkl not found in artifacts.")

model        = bundle["model"]
col_encoders = bundle.get("col_encoders", {{}})
y_encoder    = bundle.get("y_encoder", None)

# 3. Preprocess and Predict
# data = pd.read_csv("your_data.csv")  # must NOT contain target column
#
# for col, enc in col_encoders.items():
#     data[col] = enc.transform(data[[col]]).ravel()
#
# raw_preds = model.predict(data.values)
#
# if y_encoder is not None:
#     predictions = y_encoder.inverse_transform(raw_preds)
# else:
#     predictions = raw_preds
#
# print(predictions)
"""

    else:
        return base_code + f"""
# Code generation for '{model_type}' is not explicitly implemented.
# Try loading via: mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
"""


def _load_code_for_deploy(model_type: str, run_id: str) -> str:
    """Returns the model-loading block used in the FastAPI main.py."""
    if model_type == "autogluon":
        return f"""
from autogluon.tabular import TabularPredictor
import mlflow
_local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
model = TabularPredictor.load(_local)

def _predict(df):
    return model.predict(df).tolist()
"""
    elif model_type == "flaml":
        return f"""
import pickle, os, mlflow
_local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
model = None
for root, _, files in os.walk(_local):
    for f in files:
        if f.endswith(".pkl"):
            with open(os.path.join(root, f), "rb") as fh:
                model = pickle.load(fh)
            break
if model is None:
    raise FileNotFoundError("FLAML model not found.")

def _predict(df):
    return model.predict(df).tolist()
"""
    elif model_type == "h2o":
        return f"""
import h2o, os, mlflow
h2o.init()
_local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
model = None
for root, _, files in os.walk(_local):
    for f in files:
        if f.endswith(".zip") or "." not in f:
            model = h2o.load_model(os.path.join(root, f))
            break

def _predict(df):
    hf = h2o.H2OFrame(df)
    return model.predict(hf).as_data_frame()["predict"].tolist()
"""
    elif model_type == "tpot":
        return f"""
import mlflow.sklearn
model = mlflow.sklearn.load_model("runs:/{run_id}/model")

def _predict(df):
    return model.predict(df).tolist()
"""
    elif model_type == "pycaret":
        return f"""
import os, mlflow, joblib
import pandas as pd
_local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")

try:
    client = mlflow.tracking.MlflowClient()
    run = client.get_run("{run_id}")
    task_type = run.data.params.get("task_type", "Classification")
except Exception:
    task_type = "Classification"

if task_type == "Regression":
    from pycaret.regression import load_model, predict_model
elif task_type == "Time Series Forecasting":
    from pycaret.time_series import load_model, predict_model
else:
    from pycaret.classification import load_model, predict_model

_mpath = None
for root, _, files in os.walk(_local):
    for f in files:
        if f.endswith(".pkl"):
            _mpath = os.path.join(root, f).replace(".pkl", "")
            break
if _mpath is None:
    raise FileNotFoundError("PyCaret model not found.")
model = load_model(_mpath)

def _predict(df):
    preds = predict_model(model, data=df)
    if task_type == "Classification" and "prediction_label" in preds.columns:
        return preds["prediction_label"].tolist()
    else:
        # For regression or time series, it might return 'prediction_label' or just predictions
        if "prediction_label" in preds.columns:
            return preds["prediction_label"].tolist()
        return preds.iloc[:, 0].tolist()
"""
    elif model_type == "lale":
        return f"""
import os, mlflow, joblib
import numpy as np
_local = mlflow.artifacts.download_artifacts(run_id="{run_id}", artifact_path="model")
_bundle = None
for root, _, files in os.walk(_local):
    for f in files:
        if f.endswith(".pkl"):
            _bundle = joblib.load(os.path.join(root, f))
            break
if _bundle is None:
    raise FileNotFoundError("Lale model not found.")
_model        = _bundle["model"]
_col_encoders = _bundle.get("col_encoders", {{}})
_y_encoder    = _bundle.get("y_encoder", None)

def _predict(df):
    import pandas as _pd
    df = _pd.DataFrame(df)
    for col, enc in _col_encoders.items():
        if col in df.columns:
            df[col] = enc.transform(df[[col]]).ravel()
    raw = _model.predict(df.values)
    if _y_encoder is not None:
        return _y_encoder.inverse_transform(raw).tolist()
    return raw.tolist()
"""
    else:
        return """
model = None
def _predict(df):
    return []
"""


def generate_api_deployment(model_type: str, run_id: str, target_column: str, output_dir: str = "deploy") -> str:
    """
    Generates a ready-to-use FastAPI + Docker deployment package for the model.
    Supports: autogluon, flaml, h2o, tpot, pycaret, lale.
    """
    os.makedirs(output_dir, exist_ok=True)

    load_code = _load_code_for_deploy(model_type, run_id)

    main_py = f"""from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import pandas as pd
import os

app = FastAPI(title="AutoML Generated API - {model_type}", version="1.0")

# --- Model Loading ---
{load_code}
# ---------------------

@app.get("/")
def health():
    return {{"status": "running", "model": "{model_type}", "run_id": "{run_id}"}}

@app.post("/predict")
def predict(payload: dict):
    try:
        if "data" in payload:
            df = pd.DataFrame(payload["data"])
        else:
            df = pd.DataFrame([payload])
        return {{"predictions": _predict(df)}}
    except Exception as e:
        raise HTTPException(status_code=400, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)
"""

    with open(os.path.join(output_dir, "main.py"), "w", encoding="utf-8") as f:
        f.write(main_py)

    # requirements.txt
    base_reqs = """fastapi==0.104.1
uvicorn==0.24.0
pydantic==2.5.2
pandas==2.1.4
mlflow==2.9.2
"""
    extra = {
        "autogluon": "autogluon==1.0.0\n",
        "flaml": "flaml==2.1.2\n",
        "h2o": "h2o==3.44.0.3\n",
        "tpot": "tpot==0.12.2\nscikit-learn==1.2.2\n",
        "pycaret": "pycaret==3.3.0\nscikit-learn==1.2.2\nscipy==1.11.4\n",
        "lale": "lale==0.9.1\nscikit-learn==1.2.2\njoblib\nhyperopt\n",
    }
    reqs = base_reqs + extra.get(model_type, "")
    with open(os.path.join(output_dir, "requirements.txt"), "w", encoding="utf-8") as f:
        f.write(reqs)

    # Dockerfile
    dockerfile = f"""FROM python:3.11-slim

WORKDIR /app

RUN apt-get update && apt-get install -y \\
    build-essential libgomp1 libgl1 python3-dev default-jre curl \\
    && rm -rf /var/lib/apt/lists/*

COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY main.py .

EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
"""
    with open(os.path.join(output_dir, "Dockerfile"), "w", encoding="utf-8") as f:
        f.write(dockerfile)

    # README
    readme = f"""# API Deployment — {model_type} (Run: {run_id})

## Local
```bash
pip install -r requirements.txt
python main.py
```

## Docker
```bash
docker build -t ml-api:{run_id[:8]} .
docker run -p 8000:8000 ml-api:{run_id[:8]}
```

## Example request
```json
POST http://localhost:8000/predict
{{
  "data": [{{"feature1": 1.5, "feature2": "value"}}]
}}
```
"""
    with open(os.path.join(output_dir, "README.md"), "w", encoding="utf-8") as f:
        f.write(readme)

    return output_dir