Spaces:
Sleeping
Sleeping
Priyansh Saxena commited on
Commit ·
9439512
0
Parent(s):
deploy: production backend code for HF Space
Browse files- .gitignore +2 -0
- Dockerfile +17 -0
- __init__.py +1 -0
- app.py +131 -0
- chart_generator.py +187 -0
- data/readme +1 -0
- data/sample_data.csv +5 -0
- data/train_data.csv +100 -0
- data_processor.py +64 -0
- deploy_backend.ps1 +16 -0
- fine-tuned-bart-large/README.md +60 -0
- fine-tuned-bart-large/config.json +74 -0
- fine-tuned-bart-large/generation_config.json +13 -0
- fine-tuned-bart-large/merges.txt +0 -0
- fine-tuned-bart-large/special_tokens_map.json +15 -0
- fine-tuned-bart-large/tokenizer.json +0 -0
- fine-tuned-bart-large/tokenizer_config.json +57 -0
- fine-tuned-bart-large/training_args.bin +0 -0
- fine-tuned-bart-large/vocab.json +0 -0
- image_verifier.py +34 -0
- llm_agent.py +232 -0
- requirements.txt +76 -0
- start.sh +8 -0
- tracked_files.txt +0 -0
- train_model.py +64 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
__pycache__
|
Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY . .
|
| 6 |
+
|
| 7 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 8 |
+
pip install --no-cache-dir -r requirements.txt
|
| 9 |
+
|
| 10 |
+
RUN mkdir -p /app/data/uploads /app/static/images
|
| 11 |
+
|
| 12 |
+
ENV TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers
|
| 13 |
+
ENV HF_HOME=/app/.cache/huggingface
|
| 14 |
+
|
| 15 |
+
EXPOSE 7860
|
| 16 |
+
|
| 17 |
+
CMD ["python", "app.py"]
|
__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# backend/__init__.py
|
app.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from flask import Flask, request, jsonify, send_from_directory
|
| 2 |
+
from flask_cors import CORS
|
| 3 |
+
from llm_agent import LLM_Agent
|
| 4 |
+
from data_processor import DataProcessor
|
| 5 |
+
import os
|
| 6 |
+
import logging
|
| 7 |
+
import time
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
from werkzeug.utils import secure_filename
|
| 10 |
+
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
| 15 |
+
logging.getLogger('PIL').setLevel(logging.WARNING)
|
| 16 |
+
logging.getLogger('plotly').setLevel(logging.WARNING)
|
| 17 |
+
|
| 18 |
+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 19 |
+
|
| 20 |
+
app = Flask(__name__, static_folder=os.path.join(BASE_DIR, '..', 'static'))
|
| 21 |
+
|
| 22 |
+
CORS(app, origins=[
|
| 23 |
+
"https://llm-integrated-excel-plotter-app.vercel.app",
|
| 24 |
+
"http://localhost:8080",
|
| 25 |
+
"http://localhost:3000",
|
| 26 |
+
], supports_credentials=False)
|
| 27 |
+
|
| 28 |
+
agent = LLM_Agent()
|
| 29 |
+
|
| 30 |
+
UPLOAD_FOLDER = os.path.join(BASE_DIR, '..', 'data', 'uploads')
|
| 31 |
+
ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
|
| 32 |
+
MAX_UPLOAD_BYTES = 10 * 1024 * 1024 # 10 MB
|
| 33 |
+
|
| 34 |
+
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
| 35 |
+
app.config['MAX_CONTENT_LENGTH'] = MAX_UPLOAD_BYTES
|
| 36 |
+
|
| 37 |
+
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def allowed_file(filename):
|
| 41 |
+
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@app.route('/')
|
| 45 |
+
def index():
|
| 46 |
+
return jsonify({
|
| 47 |
+
"status": "ok",
|
| 48 |
+
"message": "AI Data Visualization API",
|
| 49 |
+
"endpoints": ["/plot", "/upload", "/stats", "/models"]
|
| 50 |
+
})
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
@app.route('/models', methods=['GET'])
|
| 54 |
+
def models():
|
| 55 |
+
return jsonify({
|
| 56 |
+
"models": [
|
| 57 |
+
{"id": "qwen", "name": "Qwen2.5-1.5B", "provider": "Local (transformers)", "free": True},
|
| 58 |
+
{"id": "bart", "name": "BART (fine-tuned)", "provider": "Local (transformers)", "free": True},
|
| 59 |
+
{"id": "gemini", "name": "Gemini 2.0 Flash", "provider": "Google AI (API key)", "free": False},
|
| 60 |
+
{"id": "grok", "name": "Grok-3 Mini", "provider": "xAI (API key)", "free": False},
|
| 61 |
+
],
|
| 62 |
+
"default": "qwen"
|
| 63 |
+
})
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
@app.route('/plot', methods=['POST'])
|
| 67 |
+
def plot():
|
| 68 |
+
t0 = time.time()
|
| 69 |
+
data = request.get_json(force=True)
|
| 70 |
+
if not data or not data.get('query'):
|
| 71 |
+
return jsonify({'error': 'Missing required field: query'}), 400
|
| 72 |
+
|
| 73 |
+
logging.info(f"Plot request: model={data.get('model','qwen')} query={data.get('query')[:80]}")
|
| 74 |
+
result = agent.process_request(data)
|
| 75 |
+
logging.info(f"Plot completed in {time.time() - t0:.2f}s")
|
| 76 |
+
return jsonify(result)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
@app.route('/static/<path:filename>')
|
| 80 |
+
def serve_static(filename):
|
| 81 |
+
resp = send_from_directory(app.static_folder, filename)
|
| 82 |
+
resp.headers['Access-Control-Allow-Origin'] = '*'
|
| 83 |
+
resp.headers['Cache-Control'] = 'public, max-age=300'
|
| 84 |
+
return resp
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
@app.route('/upload', methods=['POST'])
|
| 88 |
+
def upload_file():
|
| 89 |
+
if 'file' not in request.files:
|
| 90 |
+
return jsonify({'error': 'No file part in request'}), 400
|
| 91 |
+
file = request.files['file']
|
| 92 |
+
if not file.filename:
|
| 93 |
+
return jsonify({'error': 'No file selected'}), 400
|
| 94 |
+
if not allowed_file(file.filename):
|
| 95 |
+
return jsonify({'error': 'File type not allowed. Use CSV, XLS, or XLSX'}), 400
|
| 96 |
+
|
| 97 |
+
filename = secure_filename(file.filename)
|
| 98 |
+
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
| 99 |
+
file.save(file_path)
|
| 100 |
+
|
| 101 |
+
dp = DataProcessor(file_path)
|
| 102 |
+
return jsonify({
|
| 103 |
+
'message': 'File uploaded successfully',
|
| 104 |
+
'columns': dp.get_columns(),
|
| 105 |
+
'dtypes': dp.get_dtypes(),
|
| 106 |
+
'preview': dp.preview(5),
|
| 107 |
+
'file_path': file_path,
|
| 108 |
+
'row_count': len(dp.data),
|
| 109 |
+
})
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
@app.route('/stats', methods=['POST'])
|
| 113 |
+
def stats():
|
| 114 |
+
data = request.get_json(force=True) or {}
|
| 115 |
+
file_path = data.get('file_path')
|
| 116 |
+
dp = DataProcessor(file_path) if file_path and os.path.exists(file_path) else agent.data_processor
|
| 117 |
+
return jsonify({
|
| 118 |
+
'columns': dp.get_columns(),
|
| 119 |
+
'dtypes': dp.get_dtypes(),
|
| 120 |
+
'stats': dp.get_stats(),
|
| 121 |
+
'row_count': len(dp.data),
|
| 122 |
+
})
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
@app.errorhandler(413)
|
| 126 |
+
def file_too_large(e):
|
| 127 |
+
return jsonify({'error': f'File too large. Maximum size is {MAX_UPLOAD_BYTES // (1024*1024)} MB'}), 413
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
if __name__ == '__main__':
|
| 131 |
+
app.run(host='0.0.0.0', port=7860)
|
chart_generator.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import os
|
| 3 |
+
import time
|
| 4 |
+
import uuid
|
| 5 |
+
|
| 6 |
+
import matplotlib
|
| 7 |
+
matplotlib.use("Agg")
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
import pandas as pd
|
| 10 |
+
import plotly.graph_objects as go
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
_PLOTLY_LAYOUT = dict(
|
| 15 |
+
font=dict(family="Inter, system-ui, sans-serif", size=13),
|
| 16 |
+
plot_bgcolor="#0f1117",
|
| 17 |
+
paper_bgcolor="#0f1117",
|
| 18 |
+
font_color="#e2e8f0",
|
| 19 |
+
margin=dict(l=60, r=30, t=60, b=60),
|
| 20 |
+
legend=dict(bgcolor="rgba(0,0,0,0)", borderwidth=0),
|
| 21 |
+
xaxis=dict(gridcolor="#1e2d3d", linecolor="#2d3748", zerolinecolor="#2d3748"),
|
| 22 |
+
yaxis=dict(gridcolor="#1e2d3d", linecolor="#2d3748", zerolinecolor="#2d3748"),
|
| 23 |
+
colorway=["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa", "#06b6d4"],
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class ChartGenerator:
|
| 28 |
+
def __init__(self, data=None):
|
| 29 |
+
logger.info("Initializing ChartGenerator")
|
| 30 |
+
if data is not None and not (isinstance(data, pd.DataFrame) and data.empty):
|
| 31 |
+
self.data = data
|
| 32 |
+
else:
|
| 33 |
+
default_csv = os.path.join(
|
| 34 |
+
os.path.dirname(__file__), "data", "sample_data.csv"
|
| 35 |
+
)
|
| 36 |
+
self.data = pd.read_csv(default_csv) if os.path.exists(default_csv) else pd.DataFrame()
|
| 37 |
+
|
| 38 |
+
# -----------------------------------------------------------------------
|
| 39 |
+
# Public
|
| 40 |
+
# -----------------------------------------------------------------------
|
| 41 |
+
|
| 42 |
+
def generate_chart(self, plot_args: dict) -> dict:
|
| 43 |
+
"""Return {"chart_path": str, "chart_spec": dict}."""
|
| 44 |
+
t0 = time.time()
|
| 45 |
+
logger.info(f"Generating chart: {plot_args}")
|
| 46 |
+
|
| 47 |
+
x_col = plot_args["x"]
|
| 48 |
+
y_cols = plot_args["y"]
|
| 49 |
+
chart_type = plot_args.get("chart_type", "line")
|
| 50 |
+
color = plot_args.get("color", None)
|
| 51 |
+
|
| 52 |
+
self._validate_columns(x_col, y_cols)
|
| 53 |
+
|
| 54 |
+
chart_path = self._save_matplotlib(x_col, y_cols, chart_type, color)
|
| 55 |
+
chart_spec = self._build_plotly_spec(x_col, y_cols, chart_type, color)
|
| 56 |
+
|
| 57 |
+
logger.info(f"Chart ready in {time.time() - t0:.2f}s")
|
| 58 |
+
return {"chart_path": chart_path, "chart_spec": chart_spec}
|
| 59 |
+
|
| 60 |
+
# -----------------------------------------------------------------------
|
| 61 |
+
# Validation
|
| 62 |
+
# -----------------------------------------------------------------------
|
| 63 |
+
|
| 64 |
+
def _validate_columns(self, x_col: str, y_cols: list):
|
| 65 |
+
missing = [c for c in [x_col] + y_cols if c not in self.data.columns]
|
| 66 |
+
if missing:
|
| 67 |
+
raise ValueError(
|
| 68 |
+
f"Columns not found in data: {missing}. "
|
| 69 |
+
f"Available: {list(self.data.columns)}"
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
# -----------------------------------------------------------------------
|
| 73 |
+
# Matplotlib (static PNG)
|
| 74 |
+
# -----------------------------------------------------------------------
|
| 75 |
+
|
| 76 |
+
def _save_matplotlib(self, x_col, y_cols, chart_type, color) -> str:
|
| 77 |
+
plt.clf()
|
| 78 |
+
plt.close("all")
|
| 79 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
| 80 |
+
fig.patch.set_facecolor("#0f1117")
|
| 81 |
+
ax.set_facecolor("#0f1117")
|
| 82 |
+
|
| 83 |
+
palette = ["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa"]
|
| 84 |
+
x = self.data[x_col]
|
| 85 |
+
|
| 86 |
+
for i, y_col in enumerate(y_cols):
|
| 87 |
+
c = color or palette[i % len(palette)]
|
| 88 |
+
y = self.data[y_col]
|
| 89 |
+
if chart_type == "bar":
|
| 90 |
+
ax.bar(x, y, label=y_col, color=c, alpha=0.85)
|
| 91 |
+
elif chart_type == "scatter":
|
| 92 |
+
ax.scatter(x, y, label=y_col, color=c, alpha=0.8)
|
| 93 |
+
elif chart_type == "area":
|
| 94 |
+
ax.fill_between(x, y, label=y_col, color=c, alpha=0.4)
|
| 95 |
+
ax.plot(x, y, color=c)
|
| 96 |
+
elif chart_type == "histogram":
|
| 97 |
+
ax.hist(y, label=y_col, color=c, alpha=0.8, bins="auto", edgecolor="#1e2d3d")
|
| 98 |
+
elif chart_type == "box":
|
| 99 |
+
ax.boxplot(
|
| 100 |
+
[self.data[y_col].dropna().values for y_col in y_cols],
|
| 101 |
+
labels=y_cols,
|
| 102 |
+
patch_artist=True,
|
| 103 |
+
boxprops=dict(facecolor=c, color="#e2e8f0"),
|
| 104 |
+
medianprops=dict(color="#f59e0b", linewidth=2),
|
| 105 |
+
)
|
| 106 |
+
break
|
| 107 |
+
elif chart_type == "pie":
|
| 108 |
+
ax.pie(
|
| 109 |
+
y, labels=x, autopct="%1.1f%%",
|
| 110 |
+
colors=palette, startangle=90,
|
| 111 |
+
wedgeprops=dict(edgecolor="#0f1117"),
|
| 112 |
+
)
|
| 113 |
+
ax.set_aspect("equal")
|
| 114 |
+
break
|
| 115 |
+
else:
|
| 116 |
+
ax.plot(x, y, label=y_col, color=c, marker="o", linewidth=2)
|
| 117 |
+
|
| 118 |
+
for spine in ax.spines.values():
|
| 119 |
+
spine.set_edgecolor("#2d3748")
|
| 120 |
+
ax.tick_params(colors="#94a3b8")
|
| 121 |
+
ax.xaxis.label.set_color("#94a3b8")
|
| 122 |
+
ax.yaxis.label.set_color("#94a3b8")
|
| 123 |
+
ax.set_xlabel(x_col, fontsize=11)
|
| 124 |
+
ax.set_ylabel(" / ".join(y_cols), fontsize=11)
|
| 125 |
+
ax.set_title(f"{chart_type.title()} \u2014 {', '.join(y_cols)} vs {x_col}",
|
| 126 |
+
color="#e2e8f0", fontsize=13, pad=12)
|
| 127 |
+
ax.grid(True, alpha=0.15, color="#1e2d3d")
|
| 128 |
+
if chart_type not in ("pie", "histogram"):
|
| 129 |
+
ax.legend(facecolor="#161b27", edgecolor="#2d3748", labelcolor="#e2e8f0")
|
| 130 |
+
if chart_type not in ("pie", "histogram", "box") and len(x) > 5:
|
| 131 |
+
plt.xticks(rotation=45, ha="right")
|
| 132 |
+
|
| 133 |
+
output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "static", "images")
|
| 134 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 135 |
+
filename = f"chart_{uuid.uuid4().hex[:12]}.png"
|
| 136 |
+
full_path = os.path.join(output_dir, filename)
|
| 137 |
+
plt.savefig(full_path, dpi=150, bbox_inches="tight", facecolor=fig.get_facecolor())
|
| 138 |
+
plt.close(fig)
|
| 139 |
+
logger.info(f"Saved PNG: {full_path} ({os.path.getsize(full_path)} bytes)")
|
| 140 |
+
return os.path.join("static", "images", filename)
|
| 141 |
+
|
| 142 |
+
# -----------------------------------------------------------------------
|
| 143 |
+
# Plotly (interactive JSON spec for frontend)
|
| 144 |
+
# -----------------------------------------------------------------------
|
| 145 |
+
|
| 146 |
+
def _build_plotly_spec(self, x_col, y_cols, chart_type, color) -> dict:
|
| 147 |
+
palette = ["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa"]
|
| 148 |
+
x = self.data[x_col].tolist()
|
| 149 |
+
traces = []
|
| 150 |
+
|
| 151 |
+
for i, y_col in enumerate(y_cols):
|
| 152 |
+
c = color or palette[i % len(palette)]
|
| 153 |
+
y = self.data[y_col].tolist()
|
| 154 |
+
|
| 155 |
+
if chart_type == "bar":
|
| 156 |
+
traces.append(go.Bar(x=x, y=y, name=y_col, marker_color=c, opacity=0.85).to_plotly_json())
|
| 157 |
+
elif chart_type == "scatter":
|
| 158 |
+
traces.append(go.Scatter(x=x, y=y, name=y_col, mode="markers",
|
| 159 |
+
marker=dict(color=c, size=8, opacity=0.8)).to_plotly_json())
|
| 160 |
+
elif chart_type == "area":
|
| 161 |
+
traces.append(go.Scatter(x=x, y=y, name=y_col, mode="lines",
|
| 162 |
+
fill="tozeroy", line=dict(color=c)).to_plotly_json())
|
| 163 |
+
elif chart_type == "histogram":
|
| 164 |
+
traces.append(go.Histogram(x=y, name=y_col, marker_color=c, opacity=0.8).to_plotly_json())
|
| 165 |
+
elif chart_type == "box":
|
| 166 |
+
traces.append(go.Box(y=y, name=y_col, marker_color=c,
|
| 167 |
+
line_color="#e2e8f0", fillcolor=c).to_plotly_json())
|
| 168 |
+
elif chart_type == "pie":
|
| 169 |
+
traces.append(go.Pie(labels=x, values=y, name=y_col,
|
| 170 |
+
marker=dict(colors=palette)).to_plotly_json())
|
| 171 |
+
break
|
| 172 |
+
else: # line
|
| 173 |
+
traces.append(go.Scatter(x=x, y=y, name=y_col, mode="lines+markers",
|
| 174 |
+
line=dict(color=c, width=2),
|
| 175 |
+
marker=dict(size=6)).to_plotly_json())
|
| 176 |
+
|
| 177 |
+
layout = dict(
|
| 178 |
+
**_PLOTLY_LAYOUT,
|
| 179 |
+
title=dict(
|
| 180 |
+
text=f"{chart_type.title()} \u2014 {', '.join(y_cols)} vs {x_col}",
|
| 181 |
+
font=dict(size=15, color="#e2e8f0"),
|
| 182 |
+
),
|
| 183 |
+
xaxis=dict(**_PLOTLY_LAYOUT["xaxis"], title=x_col),
|
| 184 |
+
yaxis=dict(**_PLOTLY_LAYOUT["yaxis"], title=" / ".join(y_cols)),
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
return {"data": traces, "layout": layout}
|
data/readme
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
data/sample_data.csv
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Year,Sales,Employee expense,Net profit,EBITDA,EBIT,RoCE,interest,WC %
|
| 2 |
+
2020,1000,200,100,300,250,10,50,5
|
| 3 |
+
2021,1200,220,150,350,300,12,55,6
|
| 4 |
+
2022,1400,250,200,400,350,15,60,7
|
| 5 |
+
2023,1600,270,250,450,400,18,65,8
|
data/train_data.csv
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
query,arguments
|
| 2 |
+
plot the sales in the years with red line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'red'}"
|
| 3 |
+
show employee expenses and net profit over the years,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
|
| 4 |
+
display the EBITDA for each year with a blue bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'blue'}"
|
| 5 |
+
plot the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
|
| 6 |
+
show the interest payments each year with a green bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'green'}"
|
| 7 |
+
display the working capital percentage over the years,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
| 8 |
+
plot the EBIT for each year with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
|
| 9 |
+
show sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
|
| 10 |
+
display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
|
| 11 |
+
plot the employee expenses each year with a red line,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line', 'color': 'red'}"
|
| 12 |
+
show the annual sales in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
|
| 13 |
+
display EBIT and EBITDA over the years,"{'x': 'Year', 'y': ['EBIT', 'EBITDA'], 'chart_type': 'line'}"
|
| 14 |
+
plot the RoCE for each year with a purple line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'purple'}"
|
| 15 |
+
show the interest and working capital percentage,"{'x': 'Year', 'y': ['interest', 'WC %'], 'chart_type': 'line'}"
|
| 16 |
+
display the annual net profit with a blue bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'blue'}"
|
| 17 |
+
plot the sales and employee expenses in a line chart,"{'x': 'Year', 'y': ['Sales', 'Employee expense'], 'chart_type': 'line'}"
|
| 18 |
+
show the EBITDA for each year with a green bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'green'}"
|
| 19 |
+
display the EBIT over time with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
|
| 20 |
+
plot the net profit each year with a red bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'red'}"
|
| 21 |
+
show the employee expenses in a line chart,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
|
| 22 |
+
display the annual interest payments with a blue line,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line', 'color': 'blue'}"
|
| 23 |
+
plot the RoCE and WC % over the years,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
|
| 24 |
+
show the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
|
| 25 |
+
display EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
|
| 26 |
+
plot the employee expenses and EBIT,"{'x': 'Year', 'y': ['Employee expense', 'EBIT'], 'chart_type': 'line'}"
|
| 27 |
+
show the working capital percentage each year,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
| 28 |
+
display the RoCE in a bar chart,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'bar'}"
|
| 29 |
+
plot the annual sales with a green line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'green'}"
|
| 30 |
+
show the EBIT and interest over time,"{'x': 'Year', 'y': ['EBIT', 'interest'], 'chart_type': 'line'}"
|
| 31 |
+
display the net profit each year with a purple bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'purple'}"
|
| 32 |
+
plot the employee expenses over the years,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
|
| 33 |
+
show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
|
| 34 |
+
display EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
|
| 35 |
+
plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
|
| 36 |
+
show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
|
| 37 |
+
display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
|
| 38 |
+
plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
|
| 39 |
+
show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
|
| 40 |
+
display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
|
| 41 |
+
plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
|
| 42 |
+
show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
|
| 43 |
+
display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
|
| 44 |
+
plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
|
| 45 |
+
show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
|
| 46 |
+
display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
|
| 47 |
+
plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
| 48 |
+
show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
|
| 49 |
+
display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
|
| 50 |
+
plot the employee expenses each year,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
|
| 51 |
+
show the EBITDA in a bar chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar'}"
|
| 52 |
+
display the EBIT with a red line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'red'}"
|
| 53 |
+
plot the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
|
| 54 |
+
show the employee expenses and net profit,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
|
| 55 |
+
display the RoCE and working capital percentage,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
|
| 56 |
+
plot the EBITDA and EBIT,"{'x': 'Year', 'y': ['EBITDA', 'EBIT'], 'chart_type': 'line'}"
|
| 57 |
+
show the sales in a line chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
|
| 58 |
+
display the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
|
| 59 |
+
plot the annual interest payments,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
|
| 60 |
+
show the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
|
| 61 |
+
display the working capital percentage,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
| 62 |
+
plot the EBITDA each year,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
|
| 63 |
+
show the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
|
| 64 |
+
display the EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
|
| 65 |
+
plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
|
| 66 |
+
show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
|
| 67 |
+
display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
|
| 68 |
+
plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
|
| 69 |
+
show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
|
| 70 |
+
display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
|
| 71 |
+
plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
|
| 72 |
+
show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
|
| 73 |
+
display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
|
| 74 |
+
plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
|
| 75 |
+
show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
|
| 76 |
+
display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
|
| 77 |
+
plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
| 78 |
+
show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
|
| 79 |
+
display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
|
| 80 |
+
plot the EBIT and sales over time,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
|
| 81 |
+
show the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
|
| 82 |
+
display the employee expenses and EBITDA over the years,"{'x': 'Year', 'y': ['Employee expense', 'EBITDA'], 'chart_type': 'line'}"
|
| 83 |
+
plot the RoCE for each year with a red line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'red'}"
|
| 84 |
+
show the interest payments each year,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
|
| 85 |
+
display the working capital percentage over time,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
|
| 86 |
+
plot the EBIT for each year with a blue line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'blue'}"
|
| 87 |
+
show sales and EBITDA over time,"{'x': 'Year', 'y': ['Sales', 'EBITDA'], 'chart_type': 'line'}"
|
| 88 |
+
display the net profit and RoCE,"{'x': 'Year', 'y': ['Net profit', 'RoCE'], 'chart_type': 'line'}"
|
| 89 |
+
plot the employee expenses and net profit each year,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
|
| 90 |
+
show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
|
| 91 |
+
display the sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
|
| 92 |
+
plot the working capital percentage with a red line,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line', 'color': 'red'}"
|
| 93 |
+
show the annual interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
|
| 94 |
+
display the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
|
| 95 |
+
plot the sales each year with a green bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'green'}"
|
| 96 |
+
show the employee expenses and net profit over time,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
|
| 97 |
+
display the RoCE and EBITDA,"{'x': 'Year', 'y': ['RoCE', 'EBITDA'], 'chart_type': 'line'}"
|
| 98 |
+
plot the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
|
| 99 |
+
show the EBIT and sales each year,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
|
| 100 |
+
display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
|
data_processor.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
|
| 5 |
+
class DataProcessor:
|
| 6 |
+
def __init__(self, data_path=None):
|
| 7 |
+
logging.info("Initializing DataProcessor")
|
| 8 |
+
# Allow dynamic data path (for user uploads), fallback to default
|
| 9 |
+
if data_path and os.path.exists(data_path):
|
| 10 |
+
self.data_path = data_path
|
| 11 |
+
else:
|
| 12 |
+
self.data_path = os.path.join(os.path.dirname(__file__), 'data', 'sample_data.csv')
|
| 13 |
+
self.data = self.load_data(self.data_path)
|
| 14 |
+
|
| 15 |
+
def load_data(self, path):
|
| 16 |
+
ext = os.path.splitext(path)[1].lower()
|
| 17 |
+
try:
|
| 18 |
+
if ext == '.csv':
|
| 19 |
+
data = pd.read_csv(path)
|
| 20 |
+
elif ext in ['.xls', '.xlsx']:
|
| 21 |
+
data = pd.read_excel(path)
|
| 22 |
+
else:
|
| 23 |
+
raise ValueError(f"Unsupported file type: {ext}")
|
| 24 |
+
logging.info(f"Loaded data from {path} with shape {data.shape}")
|
| 25 |
+
return data
|
| 26 |
+
except Exception as e:
|
| 27 |
+
logging.error(f"Failed to load data: {e}")
|
| 28 |
+
return pd.DataFrame()
|
| 29 |
+
|
| 30 |
+
def validate_columns(self, required_columns):
|
| 31 |
+
missing = [col for col in required_columns if col not in self.data.columns]
|
| 32 |
+
if missing:
|
| 33 |
+
logging.warning(f"Missing columns: {missing}")
|
| 34 |
+
return False, missing
|
| 35 |
+
return True, []
|
| 36 |
+
|
| 37 |
+
def get_columns(self):
|
| 38 |
+
return list(self.data.columns)
|
| 39 |
+
|
| 40 |
+
def preview(self, n=5):
|
| 41 |
+
return self.data.head(n).to_dict(orient='records')
|
| 42 |
+
|
| 43 |
+
def get_dtypes(self) -> dict:
|
| 44 |
+
result = {}
|
| 45 |
+
for col, dtype in self.data.dtypes.items():
|
| 46 |
+
if pd.api.types.is_integer_dtype(dtype):
|
| 47 |
+
result[col] = "integer"
|
| 48 |
+
elif pd.api.types.is_float_dtype(dtype):
|
| 49 |
+
result[col] = "float"
|
| 50 |
+
elif pd.api.types.is_datetime64_any_dtype(dtype):
|
| 51 |
+
result[col] = "datetime"
|
| 52 |
+
elif pd.api.types.is_bool_dtype(dtype):
|
| 53 |
+
result[col] = "boolean"
|
| 54 |
+
else:
|
| 55 |
+
result[col] = "string"
|
| 56 |
+
return result
|
| 57 |
+
|
| 58 |
+
def get_stats(self) -> dict:
|
| 59 |
+
numeric = self.data.select_dtypes(include='number')
|
| 60 |
+
if numeric.empty:
|
| 61 |
+
return {}
|
| 62 |
+
desc = numeric.describe().to_dict()
|
| 63 |
+
return {col: {k: round(v, 4) for k, v in stats.items()} for col, stats in desc.items()}
|
| 64 |
+
|
deploy_backend.ps1
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
$ErrorActionPreference = 'Stop'
|
| 2 |
+
$files = git ls-files
|
| 3 |
+
foreach ($f in $files) {
|
| 4 |
+
if ($f -notlike 'backend/*' -and $f -ne 'backend') {
|
| 5 |
+
git rm -q --ignore-unmatch -- "$f"
|
| 6 |
+
}
|
| 7 |
+
}
|
| 8 |
+
Write-Output "-- staged removals --"
|
| 9 |
+
git status --porcelain=2 --branch
|
| 10 |
+
$staged = git diff --staged --name-only
|
| 11 |
+
if (-not $staged) {
|
| 12 |
+
git commit --allow-empty -m 'Deploy: keep only backend (empty commit if no removals)'
|
| 13 |
+
} else {
|
| 14 |
+
git commit -m 'Deploy: keep only backend'
|
| 15 |
+
}
|
| 16 |
+
Write-Output "-- commit done --"
|
fine-tuned-bart-large/README.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
library_name: transformers
|
| 4 |
+
language: en
|
| 5 |
+
datasets:
|
| 6 |
+
- your-dataset-name
|
| 7 |
+
metrics:
|
| 8 |
+
- rouge
|
| 9 |
+
base_model: facebook/bart-large
|
| 10 |
+
tags:
|
| 11 |
+
- text2text-generation
|
| 12 |
+
- summarization
|
| 13 |
+
- fine-tuned
|
| 14 |
+
pipeline_tag: summarization
|
| 15 |
+
model-index:
|
| 16 |
+
- name: fine-tuned-bart-large
|
| 17 |
+
results:
|
| 18 |
+
- task: summarization
|
| 19 |
+
dataset: your-dataset-name
|
| 20 |
+
metrics:
|
| 21 |
+
- rouge1: 0.45
|
| 22 |
+
- rouge2: 0.22
|
| 23 |
+
- rougel: 0.40
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
# Fine-tuned BART Large Model
|
| 27 |
+
|
| 28 |
+
This repository contains a fine-tuned BART large model for text summarization tasks.
|
| 29 |
+
|
| 30 |
+
## Model Details
|
| 31 |
+
|
| 32 |
+
- Base model: facebook/bart-large
|
| 33 |
+
- Fine-tuned on: your-dataset-name
|
| 34 |
+
- License: MIT
|
| 35 |
+
|
| 36 |
+
## Usage
|
| 37 |
+
|
| 38 |
+
You can load this model using the Hugging Face Transformers library:
|
| 39 |
+
|
| 40 |
+
```python
|
| 41 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 42 |
+
|
| 43 |
+
model_name = "ArchCoder/fine-tuned-bart-large"
|
| 44 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 45 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
Replace `"ArchCoder/fine-tuned-bart-large"` with your actual model repo name.
|
| 49 |
+
|
| 50 |
+
## Evaluation
|
| 51 |
+
|
| 52 |
+
The model was evaluated on the your-dataset-name dataset with the following metrics:
|
| 53 |
+
|
| 54 |
+
- ROUGE-1: 0.45
|
| 55 |
+
- ROUGE-2: 0.22
|
| 56 |
+
- ROUGE-L: 0.40
|
| 57 |
+
|
| 58 |
+
## License
|
| 59 |
+
|
| 60 |
+
This model is licensed under the MIT License.
|
fine-tuned-bart-large/config.json
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "facebook/bart-large",
|
| 3 |
+
"activation_dropout": 0.1,
|
| 4 |
+
"activation_function": "gelu",
|
| 5 |
+
"add_bias_logits": false,
|
| 6 |
+
"add_final_layer_norm": false,
|
| 7 |
+
"architectures": [
|
| 8 |
+
"BartForConditionalGeneration"
|
| 9 |
+
],
|
| 10 |
+
"attention_dropout": 0.1,
|
| 11 |
+
"bos_token_id": 0,
|
| 12 |
+
"classif_dropout": 0.1,
|
| 13 |
+
"classifier_dropout": 0.0,
|
| 14 |
+
"d_model": 1024,
|
| 15 |
+
"decoder_attention_heads": 16,
|
| 16 |
+
"decoder_ffn_dim": 4096,
|
| 17 |
+
"decoder_layerdrop": 0.0,
|
| 18 |
+
"decoder_layers": 12,
|
| 19 |
+
"decoder_start_token_id": 2,
|
| 20 |
+
"dropout": 0.1,
|
| 21 |
+
"early_stopping": true,
|
| 22 |
+
"encoder_attention_heads": 16,
|
| 23 |
+
"encoder_ffn_dim": 4096,
|
| 24 |
+
"encoder_layerdrop": 0.0,
|
| 25 |
+
"encoder_layers": 12,
|
| 26 |
+
"eos_token_id": 2,
|
| 27 |
+
"forced_bos_token_id": 0,
|
| 28 |
+
"forced_eos_token_id": 2,
|
| 29 |
+
"gradient_checkpointing": false,
|
| 30 |
+
"id2label": {
|
| 31 |
+
"0": "LABEL_0",
|
| 32 |
+
"1": "LABEL_1",
|
| 33 |
+
"2": "LABEL_2"
|
| 34 |
+
},
|
| 35 |
+
"init_std": 0.02,
|
| 36 |
+
"is_encoder_decoder": true,
|
| 37 |
+
"label2id": {
|
| 38 |
+
"LABEL_0": 0,
|
| 39 |
+
"LABEL_1": 1,
|
| 40 |
+
"LABEL_2": 2
|
| 41 |
+
},
|
| 42 |
+
"max_position_embeddings": 1024,
|
| 43 |
+
"model_type": "bart",
|
| 44 |
+
"no_repeat_ngram_size": 3,
|
| 45 |
+
"normalize_before": false,
|
| 46 |
+
"num_beams": 4,
|
| 47 |
+
"num_hidden_layers": 12,
|
| 48 |
+
"pad_token_id": 1,
|
| 49 |
+
"scale_embedding": false,
|
| 50 |
+
"task_specific_params": {
|
| 51 |
+
"summarization": {
|
| 52 |
+
"length_penalty": 1.0,
|
| 53 |
+
"max_length": 128,
|
| 54 |
+
"min_length": 12,
|
| 55 |
+
"num_beams": 4
|
| 56 |
+
},
|
| 57 |
+
"summarization_cnn": {
|
| 58 |
+
"length_penalty": 2.0,
|
| 59 |
+
"max_length": 142,
|
| 60 |
+
"min_length": 56,
|
| 61 |
+
"num_beams": 4
|
| 62 |
+
},
|
| 63 |
+
"summarization_xsum": {
|
| 64 |
+
"length_penalty": 1.0,
|
| 65 |
+
"max_length": 62,
|
| 66 |
+
"min_length": 11,
|
| 67 |
+
"num_beams": 6
|
| 68 |
+
}
|
| 69 |
+
},
|
| 70 |
+
"torch_dtype": "float32",
|
| 71 |
+
"transformers_version": "4.42.3",
|
| 72 |
+
"use_cache": true,
|
| 73 |
+
"vocab_size": 50265
|
| 74 |
+
}
|
fine-tuned-bart-large/generation_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 0,
|
| 4 |
+
"decoder_start_token_id": 2,
|
| 5 |
+
"early_stopping": true,
|
| 6 |
+
"eos_token_id": 2,
|
| 7 |
+
"forced_bos_token_id": 0,
|
| 8 |
+
"forced_eos_token_id": 2,
|
| 9 |
+
"no_repeat_ngram_size": 3,
|
| 10 |
+
"num_beams": 4,
|
| 11 |
+
"pad_token_id": 1,
|
| 12 |
+
"transformers_version": "4.42.3"
|
| 13 |
+
}
|
fine-tuned-bart-large/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fine-tuned-bart-large/special_tokens_map.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<s>",
|
| 3 |
+
"cls_token": "<s>",
|
| 4 |
+
"eos_token": "</s>",
|
| 5 |
+
"mask_token": {
|
| 6 |
+
"content": "<mask>",
|
| 7 |
+
"lstrip": true,
|
| 8 |
+
"normalized": true,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false
|
| 11 |
+
},
|
| 12 |
+
"pad_token": "<pad>",
|
| 13 |
+
"sep_token": "</s>",
|
| 14 |
+
"unk_token": "<unk>"
|
| 15 |
+
}
|
fine-tuned-bart-large/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
fine-tuned-bart-large/tokenizer_config.json
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"0": {
|
| 5 |
+
"content": "<s>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"1": {
|
| 13 |
+
"content": "<pad>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": true,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"2": {
|
| 21 |
+
"content": "</s>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": true,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"3": {
|
| 29 |
+
"content": "<unk>",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": true,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": true
|
| 35 |
+
},
|
| 36 |
+
"50264": {
|
| 37 |
+
"content": "<mask>",
|
| 38 |
+
"lstrip": true,
|
| 39 |
+
"normalized": true,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false,
|
| 42 |
+
"special": true
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
"bos_token": "<s>",
|
| 46 |
+
"clean_up_tokenization_spaces": true,
|
| 47 |
+
"cls_token": "<s>",
|
| 48 |
+
"eos_token": "</s>",
|
| 49 |
+
"errors": "replace",
|
| 50 |
+
"mask_token": "<mask>",
|
| 51 |
+
"model_max_length": 1024,
|
| 52 |
+
"pad_token": "<pad>",
|
| 53 |
+
"sep_token": "</s>",
|
| 54 |
+
"tokenizer_class": "BartTokenizer",
|
| 55 |
+
"trim_offsets": true,
|
| 56 |
+
"unk_token": "<unk>"
|
| 57 |
+
}
|
fine-tuned-bart-large/training_args.bin
ADDED
|
Binary file (5.24 kB). View file
|
|
|
fine-tuned-bart-large/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
image_verifier.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from PIL import Image
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import CLIPProcessor, CLIPModel
|
| 4 |
+
import os
|
| 5 |
+
import logging
|
| 6 |
+
import time
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
load_dotenv()
|
| 10 |
+
|
| 11 |
+
class ImageVerifier:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
logging.info("Initializing ImageVerifier")
|
| 14 |
+
self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
| 15 |
+
self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 16 |
+
|
| 17 |
+
def verify(self, image_path, query):
|
| 18 |
+
start_time = time.time()
|
| 19 |
+
logging.info(f"Verifying image {image_path} with query: {query}")
|
| 20 |
+
|
| 21 |
+
full_image_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), image_path)
|
| 22 |
+
|
| 23 |
+
image = Image.open(full_image_path)
|
| 24 |
+
|
| 25 |
+
inputs = self.processor(text=[query], images=image, return_tensors="pt", padding=True)
|
| 26 |
+
outputs = self.model(**inputs)
|
| 27 |
+
logits_per_image = outputs.logits_per_image
|
| 28 |
+
probs = logits_per_image.softmax(dim=1)
|
| 29 |
+
|
| 30 |
+
verification_result = probs.argmax().item() == 0
|
| 31 |
+
end_time = time.time()
|
| 32 |
+
|
| 33 |
+
logging.info(f"Image verification result: {verification_result} in {end_time - start_time} seconds")
|
| 34 |
+
return verification_result
|
llm_agent.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ast
|
| 2 |
+
import json
|
| 3 |
+
import logging
|
| 4 |
+
import os
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
from chart_generator import ChartGenerator
|
| 10 |
+
from data_processor import DataProcessor
|
| 11 |
+
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
# ---------------------------------------------------------------------------
|
| 17 |
+
# Prompt templates
|
| 18 |
+
# ---------------------------------------------------------------------------
|
| 19 |
+
|
| 20 |
+
_SYSTEM_PROMPT = (
|
| 21 |
+
"You are a data visualization expert. "
|
| 22 |
+
"Given the user request and the dataset schema provided, output ONLY a valid JSON "
|
| 23 |
+
"object — no explanation, no markdown fences, no extra text.\n\n"
|
| 24 |
+
"Required keys:\n"
|
| 25 |
+
' "x" : string — exact column name for the x-axis\n'
|
| 26 |
+
' "y" : array — one or more exact column names for the y-axis\n'
|
| 27 |
+
' "chart_type" : string — one of: line, bar, scatter, pie, histogram, box, area\n'
|
| 28 |
+
' "color" : string — optional CSS color, e.g. "red", "#4f8cff"\n\n'
|
| 29 |
+
"Rules:\n"
|
| 30 |
+
"- Use only column names that appear in the schema. Never invent names.\n"
|
| 31 |
+
"- For pie: y must contain exactly one column.\n"
|
| 32 |
+
"- For histogram/box: x may equal the first element of y.\n"
|
| 33 |
+
"- Default to line if chart type is ambiguous."
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _user_message(query: str, columns: list, dtypes: dict, sample_rows: list) -> str:
|
| 38 |
+
schema = "\n".join(f" - {c} ({dtypes.get(c, 'unknown')})" for c in columns)
|
| 39 |
+
samples = "".join(f" {json.dumps(r)}\n" for r in sample_rows[:3])
|
| 40 |
+
return (
|
| 41 |
+
f"Dataset columns:\n{schema}\n\n"
|
| 42 |
+
f"Sample rows (first 3):\n{samples}\n"
|
| 43 |
+
f"User request: {query}"
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# ---------------------------------------------------------------------------
|
| 48 |
+
# Output parsing & validation
|
| 49 |
+
# ---------------------------------------------------------------------------
|
| 50 |
+
|
| 51 |
+
def _parse_output(text: str):
|
| 52 |
+
text = text.strip()
|
| 53 |
+
if "```" in text:
|
| 54 |
+
for part in text.split("```"):
|
| 55 |
+
part = part.strip().lstrip("json").strip()
|
| 56 |
+
if part.startswith("{"):
|
| 57 |
+
text = part
|
| 58 |
+
break
|
| 59 |
+
try:
|
| 60 |
+
return json.loads(text)
|
| 61 |
+
except json.JSONDecodeError:
|
| 62 |
+
pass
|
| 63 |
+
try:
|
| 64 |
+
return ast.literal_eval(text)
|
| 65 |
+
except (SyntaxError, ValueError):
|
| 66 |
+
pass
|
| 67 |
+
return None
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _validate(args: dict, columns: list):
|
| 71 |
+
if not isinstance(args, dict):
|
| 72 |
+
return None
|
| 73 |
+
if not all(k in args for k in ("x", "y", "chart_type")):
|
| 74 |
+
return None
|
| 75 |
+
if isinstance(args["y"], str):
|
| 76 |
+
args["y"] = [args["y"]]
|
| 77 |
+
valid = {"line", "bar", "scatter", "pie", "histogram", "box", "area"}
|
| 78 |
+
if args["chart_type"] not in valid:
|
| 79 |
+
args["chart_type"] = "line"
|
| 80 |
+
if args["x"] not in columns:
|
| 81 |
+
return None
|
| 82 |
+
if not all(c in columns for c in args["y"]):
|
| 83 |
+
return None
|
| 84 |
+
return args
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# ---------------------------------------------------------------------------
|
| 88 |
+
# Agent
|
| 89 |
+
# ---------------------------------------------------------------------------
|
| 90 |
+
|
| 91 |
+
class LLM_Agent:
|
| 92 |
+
def __init__(self, data_path=None):
|
| 93 |
+
logger.info("Initializing LLM_Agent")
|
| 94 |
+
self.data_processor = DataProcessor(data_path)
|
| 95 |
+
self.chart_generator = ChartGenerator(self.data_processor.data)
|
| 96 |
+
self._bart_tokenizer = None
|
| 97 |
+
self._bart_model = None
|
| 98 |
+
self._qwen_tokenizer = None
|
| 99 |
+
self._qwen_model = None
|
| 100 |
+
|
| 101 |
+
# -- model runners -------------------------------------------------------
|
| 102 |
+
|
| 103 |
+
def _run_qwen(self, user_msg: str) -> str:
|
| 104 |
+
if self._qwen_model is None:
|
| 105 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 106 |
+
model_id = "Qwen/Qwen2.5-1.5B-Instruct"
|
| 107 |
+
logger.info("Loading Qwen model (first request)...")
|
| 108 |
+
self._qwen_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 109 |
+
self._qwen_model = AutoModelForCausalLM.from_pretrained(model_id)
|
| 110 |
+
logger.info("Qwen model loaded.")
|
| 111 |
+
messages = [
|
| 112 |
+
{"role": "system", "content": _SYSTEM_PROMPT},
|
| 113 |
+
{"role": "user", "content": user_msg},
|
| 114 |
+
]
|
| 115 |
+
text = self._qwen_tokenizer.apply_chat_template(
|
| 116 |
+
messages, tokenize=False, add_generation_prompt=True
|
| 117 |
+
)
|
| 118 |
+
inputs = self._qwen_tokenizer(text, return_tensors="pt")
|
| 119 |
+
outputs = self._qwen_model.generate(
|
| 120 |
+
**inputs, max_new_tokens=256, temperature=0.1, do_sample=True
|
| 121 |
+
)
|
| 122 |
+
return self._qwen_tokenizer.decode(
|
| 123 |
+
outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
def _run_gemini(self, user_msg: str) -> str:
|
| 127 |
+
import google.generativeai as genai
|
| 128 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 129 |
+
if not api_key:
|
| 130 |
+
raise ValueError("GEMINI_API_KEY is not set")
|
| 131 |
+
genai.configure(api_key=api_key)
|
| 132 |
+
model = genai.GenerativeModel(
|
| 133 |
+
"gemini-2.0-flash",
|
| 134 |
+
system_instruction=_SYSTEM_PROMPT,
|
| 135 |
+
)
|
| 136 |
+
return model.generate_content(user_msg).text
|
| 137 |
+
|
| 138 |
+
def _run_grok(self, user_msg: str) -> str:
|
| 139 |
+
from openai import OpenAI
|
| 140 |
+
api_key = os.getenv("GROK_API_KEY")
|
| 141 |
+
if not api_key:
|
| 142 |
+
raise ValueError("GROK_API_KEY is not set")
|
| 143 |
+
client = OpenAI(api_key=api_key, base_url="https://api.x.ai/v1")
|
| 144 |
+
resp = client.chat.completions.create(
|
| 145 |
+
model="grok-3-mini",
|
| 146 |
+
messages=[
|
| 147 |
+
{"role": "system", "content": _SYSTEM_PROMPT},
|
| 148 |
+
{"role": "user", "content": user_msg},
|
| 149 |
+
],
|
| 150 |
+
max_tokens=256,
|
| 151 |
+
temperature=0.1,
|
| 152 |
+
)
|
| 153 |
+
return resp.choices[0].message.content
|
| 154 |
+
|
| 155 |
+
def _run_bart(self, query: str) -> str:
|
| 156 |
+
if self._bart_model is None:
|
| 157 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
| 158 |
+
model_id = "ArchCoder/fine-tuned-bart-large"
|
| 159 |
+
logger.info("Loading BART model (first request)...")
|
| 160 |
+
self._bart_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 161 |
+
self._bart_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
| 162 |
+
logger.info("BART model loaded.")
|
| 163 |
+
inputs = self._bart_tokenizer(
|
| 164 |
+
query, return_tensors="pt", max_length=512, truncation=True
|
| 165 |
+
)
|
| 166 |
+
outputs = self._bart_model.generate(**inputs, max_length=100)
|
| 167 |
+
return self._bart_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 168 |
+
|
| 169 |
+
# -- main entry point ----------------------------------------------------
|
| 170 |
+
|
| 171 |
+
def process_request(self, data: dict) -> dict:
|
| 172 |
+
t0 = time.time()
|
| 173 |
+
query = data.get("query", "")
|
| 174 |
+
data_path = data.get("file_path")
|
| 175 |
+
model = data.get("model", "qwen")
|
| 176 |
+
|
| 177 |
+
if data_path and os.path.exists(data_path):
|
| 178 |
+
self.data_processor = DataProcessor(data_path)
|
| 179 |
+
self.chart_generator = ChartGenerator(self.data_processor.data)
|
| 180 |
+
|
| 181 |
+
columns = self.data_processor.get_columns()
|
| 182 |
+
dtypes = self.data_processor.get_dtypes()
|
| 183 |
+
sample_rows = self.data_processor.preview(3)
|
| 184 |
+
|
| 185 |
+
default_args = {
|
| 186 |
+
"x": columns[0] if columns else "Year",
|
| 187 |
+
"y": [columns[1]] if len(columns) > 1 else ["Sales"],
|
| 188 |
+
"chart_type": "line",
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
raw_text = ""
|
| 192 |
+
plot_args = None
|
| 193 |
+
try:
|
| 194 |
+
user_msg = _user_message(query, columns, dtypes, sample_rows)
|
| 195 |
+
if model == "gemini": raw_text = self._run_gemini(user_msg)
|
| 196 |
+
elif model == "grok": raw_text = self._run_grok(user_msg)
|
| 197 |
+
elif model == "bart": raw_text = self._run_bart(query)
|
| 198 |
+
else: raw_text = self._run_qwen(user_msg)
|
| 199 |
+
|
| 200 |
+
logger.info(f"LLM [{model}] output: {raw_text}")
|
| 201 |
+
parsed = _parse_output(raw_text)
|
| 202 |
+
plot_args = _validate(parsed, columns) if parsed else None
|
| 203 |
+
except Exception as exc:
|
| 204 |
+
logger.error(f"LLM error [{model}]: {exc}")
|
| 205 |
+
raw_text = str(exc)
|
| 206 |
+
|
| 207 |
+
if not plot_args:
|
| 208 |
+
logger.warning("Falling back to default plot args")
|
| 209 |
+
plot_args = default_args
|
| 210 |
+
|
| 211 |
+
try:
|
| 212 |
+
chart_result = self.chart_generator.generate_chart(plot_args)
|
| 213 |
+
chart_path = chart_result["chart_path"]
|
| 214 |
+
chart_spec = chart_result["chart_spec"]
|
| 215 |
+
except Exception as exc:
|
| 216 |
+
logger.error(f"Chart generation error: {exc}")
|
| 217 |
+
return {
|
| 218 |
+
"response": f"Chart generation failed: {exc}",
|
| 219 |
+
"chart_path": "",
|
| 220 |
+
"chart_spec": None,
|
| 221 |
+
"verified": False,
|
| 222 |
+
"plot_args": plot_args,
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
logger.info(f"Request processed in {time.time() - t0:.2f}s")
|
| 226 |
+
return {
|
| 227 |
+
"response": json.dumps(plot_args),
|
| 228 |
+
"chart_path": chart_path,
|
| 229 |
+
"chart_spec": chart_spec,
|
| 230 |
+
"verified": True,
|
| 231 |
+
"plot_args": plot_args,
|
| 232 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate
|
| 2 |
+
aiohttp
|
| 3 |
+
aiosignal
|
| 4 |
+
attrs
|
| 5 |
+
blinker
|
| 6 |
+
certifi
|
| 7 |
+
charset-normalizer
|
| 8 |
+
click
|
| 9 |
+
colorama
|
| 10 |
+
coloredlogs
|
| 11 |
+
contourpy
|
| 12 |
+
cycler
|
| 13 |
+
datasets
|
| 14 |
+
dill
|
| 15 |
+
et-xmlfile
|
| 16 |
+
filelock
|
| 17 |
+
Flask
|
| 18 |
+
Flask-Cors
|
| 19 |
+
fonttools
|
| 20 |
+
frozenlist
|
| 21 |
+
fsspec
|
| 22 |
+
google-generativeai>=0.8.0
|
| 23 |
+
huggingface-hub>=0.23.0
|
| 24 |
+
humanfriendly
|
| 25 |
+
idna
|
| 26 |
+
intel-openmp
|
| 27 |
+
itsdangerous
|
| 28 |
+
Jinja2
|
| 29 |
+
joblib
|
| 30 |
+
kiwisolver
|
| 31 |
+
MarkupSafe
|
| 32 |
+
matplotlib
|
| 33 |
+
mkl
|
| 34 |
+
mpmath
|
| 35 |
+
multidict
|
| 36 |
+
multiprocess
|
| 37 |
+
networkx
|
| 38 |
+
numpy
|
| 39 |
+
openai>=1.0.0
|
| 40 |
+
openpyxl
|
| 41 |
+
optimum
|
| 42 |
+
packaging
|
| 43 |
+
pandas
|
| 44 |
+
pillow
|
| 45 |
+
plotly>=5.18.0
|
| 46 |
+
protobuf
|
| 47 |
+
psutil
|
| 48 |
+
pyarrow
|
| 49 |
+
pyarrow-hotfix
|
| 50 |
+
pyparsing
|
| 51 |
+
pyreadline3
|
| 52 |
+
python-dateutil
|
| 53 |
+
python-dotenv
|
| 54 |
+
pytz
|
| 55 |
+
PyYAML
|
| 56 |
+
regex
|
| 57 |
+
requests
|
| 58 |
+
safetensors
|
| 59 |
+
scikit-learn
|
| 60 |
+
scipy
|
| 61 |
+
sentencepiece
|
| 62 |
+
six
|
| 63 |
+
sympy
|
| 64 |
+
tbb
|
| 65 |
+
threadpoolctl
|
| 66 |
+
tokenizers
|
| 67 |
+
torch
|
| 68 |
+
torchvision
|
| 69 |
+
tqdm
|
| 70 |
+
transformers>=4.36.0
|
| 71 |
+
typing_extensions
|
| 72 |
+
tzdata
|
| 73 |
+
urllib3
|
| 74 |
+
Werkzeug
|
| 75 |
+
xxhash
|
| 76 |
+
yarl
|
start.sh
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Start script for backend Flask app on Hugging Face Spaces
|
| 3 |
+
|
| 4 |
+
export FLASK_APP=app.py
|
| 5 |
+
export FLASK_ENV=production
|
| 6 |
+
|
| 7 |
+
# Run the Flask app on 0.0.0.0:7860
|
| 8 |
+
python backend/app.py
|
tracked_files.txt
ADDED
|
Binary file (10 Bytes). View file
|
|
|
train_model.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
|
| 4 |
+
from sklearn.model_selection import train_test_split
|
| 5 |
+
|
| 6 |
+
data = pd.read_csv('data/train_data.csv')
|
| 7 |
+
queries = data['query'].tolist()
|
| 8 |
+
arguments = data['arguments'].tolist()
|
| 9 |
+
|
| 10 |
+
train_queries, eval_queries, train_arguments, eval_arguments = train_test_split(queries, arguments, test_size=0.2, random_state=42)
|
| 11 |
+
|
| 12 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large")
|
| 13 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large")
|
| 14 |
+
|
| 15 |
+
train_encodings = tokenizer(train_queries, truncation=True, padding=True)
|
| 16 |
+
eval_encodings = tokenizer(eval_queries, truncation=True, padding=True)
|
| 17 |
+
|
| 18 |
+
with tokenizer.as_target_tokenizer():
|
| 19 |
+
train_labels = tokenizer(train_arguments, truncation=True, padding=True)
|
| 20 |
+
eval_labels = tokenizer(eval_arguments, truncation=True, padding=True)
|
| 21 |
+
|
| 22 |
+
class PlotDataset(torch.utils.data.Dataset):
|
| 23 |
+
def __init__(self, encodings, labels):
|
| 24 |
+
self.encodings = encodings
|
| 25 |
+
self.labels = labels
|
| 26 |
+
|
| 27 |
+
def __getitem__(self, idx):
|
| 28 |
+
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
|
| 29 |
+
item['labels'] = torch.tensor(self.labels['input_ids'][idx])
|
| 30 |
+
return item
|
| 31 |
+
|
| 32 |
+
def __len__(self):
|
| 33 |
+
return len(self.encodings.input_ids)
|
| 34 |
+
|
| 35 |
+
train_dataset = PlotDataset(train_encodings, train_labels)
|
| 36 |
+
eval_dataset = PlotDataset(eval_encodings, eval_labels)
|
| 37 |
+
|
| 38 |
+
training_args = Seq2SeqTrainingArguments(
|
| 39 |
+
output_dir='./results',
|
| 40 |
+
per_device_train_batch_size=2,
|
| 41 |
+
per_device_eval_batch_size=2,
|
| 42 |
+
num_train_epochs=3,
|
| 43 |
+
logging_dir='./logs',
|
| 44 |
+
logging_steps=10,
|
| 45 |
+
save_steps=500,
|
| 46 |
+
save_total_limit=2,
|
| 47 |
+
evaluation_strategy="epoch",
|
| 48 |
+
predict_with_generate=True,
|
| 49 |
+
generation_max_length=100,
|
| 50 |
+
)
|
| 51 |
+
trainer = Seq2SeqTrainer(
|
| 52 |
+
model=model,
|
| 53 |
+
args=training_args,
|
| 54 |
+
train_dataset=train_dataset,
|
| 55 |
+
eval_dataset=eval_dataset,
|
| 56 |
+
tokenizer=tokenizer,
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
trainer.train()
|
| 60 |
+
|
| 61 |
+
trainer.save_model("fine-tuned-bart-large")
|
| 62 |
+
tokenizer.save_pretrained("fine-tuned-bart-large")
|
| 63 |
+
|
| 64 |
+
print("Model and tokenizer fine-tuned and saved as 'fine-tuned-bart-large'")
|