Priyansh Saxena commited on
Commit
9439512
·
0 Parent(s):

deploy: production backend code for HF Space

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY . .
6
+
7
+ RUN pip install --no-cache-dir --upgrade pip && \
8
+ pip install --no-cache-dir -r requirements.txt
9
+
10
+ RUN mkdir -p /app/data/uploads /app/static/images
11
+
12
+ ENV TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers
13
+ ENV HF_HOME=/app/.cache/huggingface
14
+
15
+ EXPOSE 7860
16
+
17
+ CMD ["python", "app.py"]
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # backend/__init__.py
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_from_directory
2
+ from flask_cors import CORS
3
+ from llm_agent import LLM_Agent
4
+ from data_processor import DataProcessor
5
+ import os
6
+ import logging
7
+ import time
8
+ from dotenv import load_dotenv
9
+ from werkzeug.utils import secure_filename
10
+
11
+ load_dotenv()
12
+
13
+ logging.basicConfig(level=logging.INFO)
14
+ logging.getLogger('matplotlib').setLevel(logging.WARNING)
15
+ logging.getLogger('PIL').setLevel(logging.WARNING)
16
+ logging.getLogger('plotly').setLevel(logging.WARNING)
17
+
18
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
19
+
20
+ app = Flask(__name__, static_folder=os.path.join(BASE_DIR, '..', 'static'))
21
+
22
+ CORS(app, origins=[
23
+ "https://llm-integrated-excel-plotter-app.vercel.app",
24
+ "http://localhost:8080",
25
+ "http://localhost:3000",
26
+ ], supports_credentials=False)
27
+
28
+ agent = LLM_Agent()
29
+
30
+ UPLOAD_FOLDER = os.path.join(BASE_DIR, '..', 'data', 'uploads')
31
+ ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
32
+ MAX_UPLOAD_BYTES = 10 * 1024 * 1024 # 10 MB
33
+
34
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
35
+ app.config['MAX_CONTENT_LENGTH'] = MAX_UPLOAD_BYTES
36
+
37
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
38
+
39
+
40
+ def allowed_file(filename):
41
+ return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
42
+
43
+
44
+ @app.route('/')
45
+ def index():
46
+ return jsonify({
47
+ "status": "ok",
48
+ "message": "AI Data Visualization API",
49
+ "endpoints": ["/plot", "/upload", "/stats", "/models"]
50
+ })
51
+
52
+
53
+ @app.route('/models', methods=['GET'])
54
+ def models():
55
+ return jsonify({
56
+ "models": [
57
+ {"id": "qwen", "name": "Qwen2.5-1.5B", "provider": "Local (transformers)", "free": True},
58
+ {"id": "bart", "name": "BART (fine-tuned)", "provider": "Local (transformers)", "free": True},
59
+ {"id": "gemini", "name": "Gemini 2.0 Flash", "provider": "Google AI (API key)", "free": False},
60
+ {"id": "grok", "name": "Grok-3 Mini", "provider": "xAI (API key)", "free": False},
61
+ ],
62
+ "default": "qwen"
63
+ })
64
+
65
+
66
+ @app.route('/plot', methods=['POST'])
67
+ def plot():
68
+ t0 = time.time()
69
+ data = request.get_json(force=True)
70
+ if not data or not data.get('query'):
71
+ return jsonify({'error': 'Missing required field: query'}), 400
72
+
73
+ logging.info(f"Plot request: model={data.get('model','qwen')} query={data.get('query')[:80]}")
74
+ result = agent.process_request(data)
75
+ logging.info(f"Plot completed in {time.time() - t0:.2f}s")
76
+ return jsonify(result)
77
+
78
+
79
+ @app.route('/static/<path:filename>')
80
+ def serve_static(filename):
81
+ resp = send_from_directory(app.static_folder, filename)
82
+ resp.headers['Access-Control-Allow-Origin'] = '*'
83
+ resp.headers['Cache-Control'] = 'public, max-age=300'
84
+ return resp
85
+
86
+
87
+ @app.route('/upload', methods=['POST'])
88
+ def upload_file():
89
+ if 'file' not in request.files:
90
+ return jsonify({'error': 'No file part in request'}), 400
91
+ file = request.files['file']
92
+ if not file.filename:
93
+ return jsonify({'error': 'No file selected'}), 400
94
+ if not allowed_file(file.filename):
95
+ return jsonify({'error': 'File type not allowed. Use CSV, XLS, or XLSX'}), 400
96
+
97
+ filename = secure_filename(file.filename)
98
+ file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
99
+ file.save(file_path)
100
+
101
+ dp = DataProcessor(file_path)
102
+ return jsonify({
103
+ 'message': 'File uploaded successfully',
104
+ 'columns': dp.get_columns(),
105
+ 'dtypes': dp.get_dtypes(),
106
+ 'preview': dp.preview(5),
107
+ 'file_path': file_path,
108
+ 'row_count': len(dp.data),
109
+ })
110
+
111
+
112
+ @app.route('/stats', methods=['POST'])
113
+ def stats():
114
+ data = request.get_json(force=True) or {}
115
+ file_path = data.get('file_path')
116
+ dp = DataProcessor(file_path) if file_path and os.path.exists(file_path) else agent.data_processor
117
+ return jsonify({
118
+ 'columns': dp.get_columns(),
119
+ 'dtypes': dp.get_dtypes(),
120
+ 'stats': dp.get_stats(),
121
+ 'row_count': len(dp.data),
122
+ })
123
+
124
+
125
+ @app.errorhandler(413)
126
+ def file_too_large(e):
127
+ return jsonify({'error': f'File too large. Maximum size is {MAX_UPLOAD_BYTES // (1024*1024)} MB'}), 413
128
+
129
+
130
+ if __name__ == '__main__':
131
+ app.run(host='0.0.0.0', port=7860)
chart_generator.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import time
4
+ import uuid
5
+
6
+ import matplotlib
7
+ matplotlib.use("Agg")
8
+ import matplotlib.pyplot as plt
9
+ import pandas as pd
10
+ import plotly.graph_objects as go
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ _PLOTLY_LAYOUT = dict(
15
+ font=dict(family="Inter, system-ui, sans-serif", size=13),
16
+ plot_bgcolor="#0f1117",
17
+ paper_bgcolor="#0f1117",
18
+ font_color="#e2e8f0",
19
+ margin=dict(l=60, r=30, t=60, b=60),
20
+ legend=dict(bgcolor="rgba(0,0,0,0)", borderwidth=0),
21
+ xaxis=dict(gridcolor="#1e2d3d", linecolor="#2d3748", zerolinecolor="#2d3748"),
22
+ yaxis=dict(gridcolor="#1e2d3d", linecolor="#2d3748", zerolinecolor="#2d3748"),
23
+ colorway=["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa", "#06b6d4"],
24
+ )
25
+
26
+
27
+ class ChartGenerator:
28
+ def __init__(self, data=None):
29
+ logger.info("Initializing ChartGenerator")
30
+ if data is not None and not (isinstance(data, pd.DataFrame) and data.empty):
31
+ self.data = data
32
+ else:
33
+ default_csv = os.path.join(
34
+ os.path.dirname(__file__), "data", "sample_data.csv"
35
+ )
36
+ self.data = pd.read_csv(default_csv) if os.path.exists(default_csv) else pd.DataFrame()
37
+
38
+ # -----------------------------------------------------------------------
39
+ # Public
40
+ # -----------------------------------------------------------------------
41
+
42
+ def generate_chart(self, plot_args: dict) -> dict:
43
+ """Return {"chart_path": str, "chart_spec": dict}."""
44
+ t0 = time.time()
45
+ logger.info(f"Generating chart: {plot_args}")
46
+
47
+ x_col = plot_args["x"]
48
+ y_cols = plot_args["y"]
49
+ chart_type = plot_args.get("chart_type", "line")
50
+ color = plot_args.get("color", None)
51
+
52
+ self._validate_columns(x_col, y_cols)
53
+
54
+ chart_path = self._save_matplotlib(x_col, y_cols, chart_type, color)
55
+ chart_spec = self._build_plotly_spec(x_col, y_cols, chart_type, color)
56
+
57
+ logger.info(f"Chart ready in {time.time() - t0:.2f}s")
58
+ return {"chart_path": chart_path, "chart_spec": chart_spec}
59
+
60
+ # -----------------------------------------------------------------------
61
+ # Validation
62
+ # -----------------------------------------------------------------------
63
+
64
+ def _validate_columns(self, x_col: str, y_cols: list):
65
+ missing = [c for c in [x_col] + y_cols if c not in self.data.columns]
66
+ if missing:
67
+ raise ValueError(
68
+ f"Columns not found in data: {missing}. "
69
+ f"Available: {list(self.data.columns)}"
70
+ )
71
+
72
+ # -----------------------------------------------------------------------
73
+ # Matplotlib (static PNG)
74
+ # -----------------------------------------------------------------------
75
+
76
+ def _save_matplotlib(self, x_col, y_cols, chart_type, color) -> str:
77
+ plt.clf()
78
+ plt.close("all")
79
+ fig, ax = plt.subplots(figsize=(10, 6))
80
+ fig.patch.set_facecolor("#0f1117")
81
+ ax.set_facecolor("#0f1117")
82
+
83
+ palette = ["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa"]
84
+ x = self.data[x_col]
85
+
86
+ for i, y_col in enumerate(y_cols):
87
+ c = color or palette[i % len(palette)]
88
+ y = self.data[y_col]
89
+ if chart_type == "bar":
90
+ ax.bar(x, y, label=y_col, color=c, alpha=0.85)
91
+ elif chart_type == "scatter":
92
+ ax.scatter(x, y, label=y_col, color=c, alpha=0.8)
93
+ elif chart_type == "area":
94
+ ax.fill_between(x, y, label=y_col, color=c, alpha=0.4)
95
+ ax.plot(x, y, color=c)
96
+ elif chart_type == "histogram":
97
+ ax.hist(y, label=y_col, color=c, alpha=0.8, bins="auto", edgecolor="#1e2d3d")
98
+ elif chart_type == "box":
99
+ ax.boxplot(
100
+ [self.data[y_col].dropna().values for y_col in y_cols],
101
+ labels=y_cols,
102
+ patch_artist=True,
103
+ boxprops=dict(facecolor=c, color="#e2e8f0"),
104
+ medianprops=dict(color="#f59e0b", linewidth=2),
105
+ )
106
+ break
107
+ elif chart_type == "pie":
108
+ ax.pie(
109
+ y, labels=x, autopct="%1.1f%%",
110
+ colors=palette, startangle=90,
111
+ wedgeprops=dict(edgecolor="#0f1117"),
112
+ )
113
+ ax.set_aspect("equal")
114
+ break
115
+ else:
116
+ ax.plot(x, y, label=y_col, color=c, marker="o", linewidth=2)
117
+
118
+ for spine in ax.spines.values():
119
+ spine.set_edgecolor("#2d3748")
120
+ ax.tick_params(colors="#94a3b8")
121
+ ax.xaxis.label.set_color("#94a3b8")
122
+ ax.yaxis.label.set_color("#94a3b8")
123
+ ax.set_xlabel(x_col, fontsize=11)
124
+ ax.set_ylabel(" / ".join(y_cols), fontsize=11)
125
+ ax.set_title(f"{chart_type.title()} \u2014 {', '.join(y_cols)} vs {x_col}",
126
+ color="#e2e8f0", fontsize=13, pad=12)
127
+ ax.grid(True, alpha=0.15, color="#1e2d3d")
128
+ if chart_type not in ("pie", "histogram"):
129
+ ax.legend(facecolor="#161b27", edgecolor="#2d3748", labelcolor="#e2e8f0")
130
+ if chart_type not in ("pie", "histogram", "box") and len(x) > 5:
131
+ plt.xticks(rotation=45, ha="right")
132
+
133
+ output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "static", "images")
134
+ os.makedirs(output_dir, exist_ok=True)
135
+ filename = f"chart_{uuid.uuid4().hex[:12]}.png"
136
+ full_path = os.path.join(output_dir, filename)
137
+ plt.savefig(full_path, dpi=150, bbox_inches="tight", facecolor=fig.get_facecolor())
138
+ plt.close(fig)
139
+ logger.info(f"Saved PNG: {full_path} ({os.path.getsize(full_path)} bytes)")
140
+ return os.path.join("static", "images", filename)
141
+
142
+ # -----------------------------------------------------------------------
143
+ # Plotly (interactive JSON spec for frontend)
144
+ # -----------------------------------------------------------------------
145
+
146
+ def _build_plotly_spec(self, x_col, y_cols, chart_type, color) -> dict:
147
+ palette = ["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa"]
148
+ x = self.data[x_col].tolist()
149
+ traces = []
150
+
151
+ for i, y_col in enumerate(y_cols):
152
+ c = color or palette[i % len(palette)]
153
+ y = self.data[y_col].tolist()
154
+
155
+ if chart_type == "bar":
156
+ traces.append(go.Bar(x=x, y=y, name=y_col, marker_color=c, opacity=0.85).to_plotly_json())
157
+ elif chart_type == "scatter":
158
+ traces.append(go.Scatter(x=x, y=y, name=y_col, mode="markers",
159
+ marker=dict(color=c, size=8, opacity=0.8)).to_plotly_json())
160
+ elif chart_type == "area":
161
+ traces.append(go.Scatter(x=x, y=y, name=y_col, mode="lines",
162
+ fill="tozeroy", line=dict(color=c)).to_plotly_json())
163
+ elif chart_type == "histogram":
164
+ traces.append(go.Histogram(x=y, name=y_col, marker_color=c, opacity=0.8).to_plotly_json())
165
+ elif chart_type == "box":
166
+ traces.append(go.Box(y=y, name=y_col, marker_color=c,
167
+ line_color="#e2e8f0", fillcolor=c).to_plotly_json())
168
+ elif chart_type == "pie":
169
+ traces.append(go.Pie(labels=x, values=y, name=y_col,
170
+ marker=dict(colors=palette)).to_plotly_json())
171
+ break
172
+ else: # line
173
+ traces.append(go.Scatter(x=x, y=y, name=y_col, mode="lines+markers",
174
+ line=dict(color=c, width=2),
175
+ marker=dict(size=6)).to_plotly_json())
176
+
177
+ layout = dict(
178
+ **_PLOTLY_LAYOUT,
179
+ title=dict(
180
+ text=f"{chart_type.title()} \u2014 {', '.join(y_cols)} vs {x_col}",
181
+ font=dict(size=15, color="#e2e8f0"),
182
+ ),
183
+ xaxis=dict(**_PLOTLY_LAYOUT["xaxis"], title=x_col),
184
+ yaxis=dict(**_PLOTLY_LAYOUT["yaxis"], title=" / ".join(y_cols)),
185
+ )
186
+
187
+ return {"data": traces, "layout": layout}
data/readme ADDED
@@ -0,0 +1 @@
 
 
1
+
data/sample_data.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Year,Sales,Employee expense,Net profit,EBITDA,EBIT,RoCE,interest,WC %
2
+ 2020,1000,200,100,300,250,10,50,5
3
+ 2021,1200,220,150,350,300,12,55,6
4
+ 2022,1400,250,200,400,350,15,60,7
5
+ 2023,1600,270,250,450,400,18,65,8
data/train_data.csv ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ query,arguments
2
+ plot the sales in the years with red line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'red'}"
3
+ show employee expenses and net profit over the years,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
4
+ display the EBITDA for each year with a blue bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'blue'}"
5
+ plot the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
6
+ show the interest payments each year with a green bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'green'}"
7
+ display the working capital percentage over the years,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
8
+ plot the EBIT for each year with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
9
+ show sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
10
+ display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
11
+ plot the employee expenses each year with a red line,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line', 'color': 'red'}"
12
+ show the annual sales in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
13
+ display EBIT and EBITDA over the years,"{'x': 'Year', 'y': ['EBIT', 'EBITDA'], 'chart_type': 'line'}"
14
+ plot the RoCE for each year with a purple line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'purple'}"
15
+ show the interest and working capital percentage,"{'x': 'Year', 'y': ['interest', 'WC %'], 'chart_type': 'line'}"
16
+ display the annual net profit with a blue bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'blue'}"
17
+ plot the sales and employee expenses in a line chart,"{'x': 'Year', 'y': ['Sales', 'Employee expense'], 'chart_type': 'line'}"
18
+ show the EBITDA for each year with a green bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'green'}"
19
+ display the EBIT over time with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
20
+ plot the net profit each year with a red bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'red'}"
21
+ show the employee expenses in a line chart,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
22
+ display the annual interest payments with a blue line,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line', 'color': 'blue'}"
23
+ plot the RoCE and WC % over the years,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
24
+ show the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
25
+ display EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
26
+ plot the employee expenses and EBIT,"{'x': 'Year', 'y': ['Employee expense', 'EBIT'], 'chart_type': 'line'}"
27
+ show the working capital percentage each year,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
28
+ display the RoCE in a bar chart,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'bar'}"
29
+ plot the annual sales with a green line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'green'}"
30
+ show the EBIT and interest over time,"{'x': 'Year', 'y': ['EBIT', 'interest'], 'chart_type': 'line'}"
31
+ display the net profit each year with a purple bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'purple'}"
32
+ plot the employee expenses over the years,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
33
+ show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
34
+ display EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
35
+ plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
36
+ show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
37
+ display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
38
+ plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
39
+ show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
40
+ display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
41
+ plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
42
+ show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
43
+ display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
44
+ plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
45
+ show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
46
+ display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
47
+ plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
48
+ show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
49
+ display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
50
+ plot the employee expenses each year,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
51
+ show the EBITDA in a bar chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar'}"
52
+ display the EBIT with a red line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'red'}"
53
+ plot the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
54
+ show the employee expenses and net profit,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
55
+ display the RoCE and working capital percentage,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
56
+ plot the EBITDA and EBIT,"{'x': 'Year', 'y': ['EBITDA', 'EBIT'], 'chart_type': 'line'}"
57
+ show the sales in a line chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
58
+ display the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
59
+ plot the annual interest payments,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
60
+ show the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
61
+ display the working capital percentage,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
62
+ plot the EBITDA each year,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
63
+ show the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
64
+ display the EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
65
+ plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
66
+ show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
67
+ display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
68
+ plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
69
+ show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
70
+ display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
71
+ plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
72
+ show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
73
+ display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
74
+ plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
75
+ show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
76
+ display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
77
+ plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
78
+ show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
79
+ display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
80
+ plot the EBIT and sales over time,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
81
+ show the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
82
+ display the employee expenses and EBITDA over the years,"{'x': 'Year', 'y': ['Employee expense', 'EBITDA'], 'chart_type': 'line'}"
83
+ plot the RoCE for each year with a red line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'red'}"
84
+ show the interest payments each year,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
85
+ display the working capital percentage over time,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
86
+ plot the EBIT for each year with a blue line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'blue'}"
87
+ show sales and EBITDA over time,"{'x': 'Year', 'y': ['Sales', 'EBITDA'], 'chart_type': 'line'}"
88
+ display the net profit and RoCE,"{'x': 'Year', 'y': ['Net profit', 'RoCE'], 'chart_type': 'line'}"
89
+ plot the employee expenses and net profit each year,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
90
+ show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
91
+ display the sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
92
+ plot the working capital percentage with a red line,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line', 'color': 'red'}"
93
+ show the annual interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
94
+ display the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
95
+ plot the sales each year with a green bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'green'}"
96
+ show the employee expenses and net profit over time,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
97
+ display the RoCE and EBITDA,"{'x': 'Year', 'y': ['RoCE', 'EBITDA'], 'chart_type': 'line'}"
98
+ plot the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
99
+ show the EBIT and sales each year,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
100
+ display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
data_processor.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ import logging
4
+
5
+ class DataProcessor:
6
+ def __init__(self, data_path=None):
7
+ logging.info("Initializing DataProcessor")
8
+ # Allow dynamic data path (for user uploads), fallback to default
9
+ if data_path and os.path.exists(data_path):
10
+ self.data_path = data_path
11
+ else:
12
+ self.data_path = os.path.join(os.path.dirname(__file__), 'data', 'sample_data.csv')
13
+ self.data = self.load_data(self.data_path)
14
+
15
+ def load_data(self, path):
16
+ ext = os.path.splitext(path)[1].lower()
17
+ try:
18
+ if ext == '.csv':
19
+ data = pd.read_csv(path)
20
+ elif ext in ['.xls', '.xlsx']:
21
+ data = pd.read_excel(path)
22
+ else:
23
+ raise ValueError(f"Unsupported file type: {ext}")
24
+ logging.info(f"Loaded data from {path} with shape {data.shape}")
25
+ return data
26
+ except Exception as e:
27
+ logging.error(f"Failed to load data: {e}")
28
+ return pd.DataFrame()
29
+
30
+ def validate_columns(self, required_columns):
31
+ missing = [col for col in required_columns if col not in self.data.columns]
32
+ if missing:
33
+ logging.warning(f"Missing columns: {missing}")
34
+ return False, missing
35
+ return True, []
36
+
37
+ def get_columns(self):
38
+ return list(self.data.columns)
39
+
40
+ def preview(self, n=5):
41
+ return self.data.head(n).to_dict(orient='records')
42
+
43
+ def get_dtypes(self) -> dict:
44
+ result = {}
45
+ for col, dtype in self.data.dtypes.items():
46
+ if pd.api.types.is_integer_dtype(dtype):
47
+ result[col] = "integer"
48
+ elif pd.api.types.is_float_dtype(dtype):
49
+ result[col] = "float"
50
+ elif pd.api.types.is_datetime64_any_dtype(dtype):
51
+ result[col] = "datetime"
52
+ elif pd.api.types.is_bool_dtype(dtype):
53
+ result[col] = "boolean"
54
+ else:
55
+ result[col] = "string"
56
+ return result
57
+
58
+ def get_stats(self) -> dict:
59
+ numeric = self.data.select_dtypes(include='number')
60
+ if numeric.empty:
61
+ return {}
62
+ desc = numeric.describe().to_dict()
63
+ return {col: {k: round(v, 4) for k, v in stats.items()} for col, stats in desc.items()}
64
+
deploy_backend.ps1 ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $ErrorActionPreference = 'Stop'
2
+ $files = git ls-files
3
+ foreach ($f in $files) {
4
+ if ($f -notlike 'backend/*' -and $f -ne 'backend') {
5
+ git rm -q --ignore-unmatch -- "$f"
6
+ }
7
+ }
8
+ Write-Output "-- staged removals --"
9
+ git status --porcelain=2 --branch
10
+ $staged = git diff --staged --name-only
11
+ if (-not $staged) {
12
+ git commit --allow-empty -m 'Deploy: keep only backend (empty commit if no removals)'
13
+ } else {
14
+ git commit -m 'Deploy: keep only backend'
15
+ }
16
+ Write-Output "-- commit done --"
fine-tuned-bart-large/README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ library_name: transformers
4
+ language: en
5
+ datasets:
6
+ - your-dataset-name
7
+ metrics:
8
+ - rouge
9
+ base_model: facebook/bart-large
10
+ tags:
11
+ - text2text-generation
12
+ - summarization
13
+ - fine-tuned
14
+ pipeline_tag: summarization
15
+ model-index:
16
+ - name: fine-tuned-bart-large
17
+ results:
18
+ - task: summarization
19
+ dataset: your-dataset-name
20
+ metrics:
21
+ - rouge1: 0.45
22
+ - rouge2: 0.22
23
+ - rougel: 0.40
24
+ ---
25
+
26
+ # Fine-tuned BART Large Model
27
+
28
+ This repository contains a fine-tuned BART large model for text summarization tasks.
29
+
30
+ ## Model Details
31
+
32
+ - Base model: facebook/bart-large
33
+ - Fine-tuned on: your-dataset-name
34
+ - License: MIT
35
+
36
+ ## Usage
37
+
38
+ You can load this model using the Hugging Face Transformers library:
39
+
40
+ ```python
41
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
42
+
43
+ model_name = "ArchCoder/fine-tuned-bart-large"
44
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
45
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
46
+ ```
47
+
48
+ Replace `"ArchCoder/fine-tuned-bart-large"` with your actual model repo name.
49
+
50
+ ## Evaluation
51
+
52
+ The model was evaluated on the your-dataset-name dataset with the following metrics:
53
+
54
+ - ROUGE-1: 0.45
55
+ - ROUGE-2: 0.22
56
+ - ROUGE-L: 0.40
57
+
58
+ ## License
59
+
60
+ This model is licensed under the MIT License.
fine-tuned-bart-large/config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/bart-large",
3
+ "activation_dropout": 0.1,
4
+ "activation_function": "gelu",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "BartForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.1,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.1,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 1024,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_ffn_dim": 4096,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_start_token_id": 2,
20
+ "dropout": 0.1,
21
+ "early_stopping": true,
22
+ "encoder_attention_heads": 16,
23
+ "encoder_ffn_dim": 4096,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 12,
26
+ "eos_token_id": 2,
27
+ "forced_bos_token_id": 0,
28
+ "forced_eos_token_id": 2,
29
+ "gradient_checkpointing": false,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
+ "init_std": 0.02,
36
+ "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
+ "max_position_embeddings": 1024,
43
+ "model_type": "bart",
44
+ "no_repeat_ngram_size": 3,
45
+ "normalize_before": false,
46
+ "num_beams": 4,
47
+ "num_hidden_layers": 12,
48
+ "pad_token_id": 1,
49
+ "scale_embedding": false,
50
+ "task_specific_params": {
51
+ "summarization": {
52
+ "length_penalty": 1.0,
53
+ "max_length": 128,
54
+ "min_length": 12,
55
+ "num_beams": 4
56
+ },
57
+ "summarization_cnn": {
58
+ "length_penalty": 2.0,
59
+ "max_length": 142,
60
+ "min_length": 56,
61
+ "num_beams": 4
62
+ },
63
+ "summarization_xsum": {
64
+ "length_penalty": 1.0,
65
+ "max_length": 62,
66
+ "min_length": 11,
67
+ "num_beams": 6
68
+ }
69
+ },
70
+ "torch_dtype": "float32",
71
+ "transformers_version": "4.42.3",
72
+ "use_cache": true,
73
+ "vocab_size": 50265
74
+ }
fine-tuned-bart-large/generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "decoder_start_token_id": 2,
5
+ "early_stopping": true,
6
+ "eos_token_id": 2,
7
+ "forced_bos_token_id": 0,
8
+ "forced_eos_token_id": 2,
9
+ "no_repeat_ngram_size": 3,
10
+ "num_beams": 4,
11
+ "pad_token_id": 1,
12
+ "transformers_version": "4.42.3"
13
+ }
fine-tuned-bart-large/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
fine-tuned-bart-large/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
fine-tuned-bart-large/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
fine-tuned-bart-large/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "model_max_length": 1024,
52
+ "pad_token": "<pad>",
53
+ "sep_token": "</s>",
54
+ "tokenizer_class": "BartTokenizer",
55
+ "trim_offsets": true,
56
+ "unk_token": "<unk>"
57
+ }
fine-tuned-bart-large/training_args.bin ADDED
Binary file (5.24 kB). View file
 
fine-tuned-bart-large/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
image_verifier.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import torch
3
+ from transformers import CLIPProcessor, CLIPModel
4
+ import os
5
+ import logging
6
+ import time
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv()
10
+
11
+ class ImageVerifier:
12
+ def __init__(self):
13
+ logging.info("Initializing ImageVerifier")
14
+ self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
15
+ self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
16
+
17
+ def verify(self, image_path, query):
18
+ start_time = time.time()
19
+ logging.info(f"Verifying image {image_path} with query: {query}")
20
+
21
+ full_image_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), image_path)
22
+
23
+ image = Image.open(full_image_path)
24
+
25
+ inputs = self.processor(text=[query], images=image, return_tensors="pt", padding=True)
26
+ outputs = self.model(**inputs)
27
+ logits_per_image = outputs.logits_per_image
28
+ probs = logits_per_image.softmax(dim=1)
29
+
30
+ verification_result = probs.argmax().item() == 0
31
+ end_time = time.time()
32
+
33
+ logging.info(f"Image verification result: {verification_result} in {end_time - start_time} seconds")
34
+ return verification_result
llm_agent.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import json
3
+ import logging
4
+ import os
5
+ import time
6
+
7
+ from dotenv import load_dotenv
8
+
9
+ from chart_generator import ChartGenerator
10
+ from data_processor import DataProcessor
11
+
12
+ load_dotenv()
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # Prompt templates
18
+ # ---------------------------------------------------------------------------
19
+
20
+ _SYSTEM_PROMPT = (
21
+ "You are a data visualization expert. "
22
+ "Given the user request and the dataset schema provided, output ONLY a valid JSON "
23
+ "object — no explanation, no markdown fences, no extra text.\n\n"
24
+ "Required keys:\n"
25
+ ' "x" : string — exact column name for the x-axis\n'
26
+ ' "y" : array — one or more exact column names for the y-axis\n'
27
+ ' "chart_type" : string — one of: line, bar, scatter, pie, histogram, box, area\n'
28
+ ' "color" : string — optional CSS color, e.g. "red", "#4f8cff"\n\n'
29
+ "Rules:\n"
30
+ "- Use only column names that appear in the schema. Never invent names.\n"
31
+ "- For pie: y must contain exactly one column.\n"
32
+ "- For histogram/box: x may equal the first element of y.\n"
33
+ "- Default to line if chart type is ambiguous."
34
+ )
35
+
36
+
37
+ def _user_message(query: str, columns: list, dtypes: dict, sample_rows: list) -> str:
38
+ schema = "\n".join(f" - {c} ({dtypes.get(c, 'unknown')})" for c in columns)
39
+ samples = "".join(f" {json.dumps(r)}\n" for r in sample_rows[:3])
40
+ return (
41
+ f"Dataset columns:\n{schema}\n\n"
42
+ f"Sample rows (first 3):\n{samples}\n"
43
+ f"User request: {query}"
44
+ )
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Output parsing & validation
49
+ # ---------------------------------------------------------------------------
50
+
51
+ def _parse_output(text: str):
52
+ text = text.strip()
53
+ if "```" in text:
54
+ for part in text.split("```"):
55
+ part = part.strip().lstrip("json").strip()
56
+ if part.startswith("{"):
57
+ text = part
58
+ break
59
+ try:
60
+ return json.loads(text)
61
+ except json.JSONDecodeError:
62
+ pass
63
+ try:
64
+ return ast.literal_eval(text)
65
+ except (SyntaxError, ValueError):
66
+ pass
67
+ return None
68
+
69
+
70
+ def _validate(args: dict, columns: list):
71
+ if not isinstance(args, dict):
72
+ return None
73
+ if not all(k in args for k in ("x", "y", "chart_type")):
74
+ return None
75
+ if isinstance(args["y"], str):
76
+ args["y"] = [args["y"]]
77
+ valid = {"line", "bar", "scatter", "pie", "histogram", "box", "area"}
78
+ if args["chart_type"] not in valid:
79
+ args["chart_type"] = "line"
80
+ if args["x"] not in columns:
81
+ return None
82
+ if not all(c in columns for c in args["y"]):
83
+ return None
84
+ return args
85
+
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Agent
89
+ # ---------------------------------------------------------------------------
90
+
91
+ class LLM_Agent:
92
+ def __init__(self, data_path=None):
93
+ logger.info("Initializing LLM_Agent")
94
+ self.data_processor = DataProcessor(data_path)
95
+ self.chart_generator = ChartGenerator(self.data_processor.data)
96
+ self._bart_tokenizer = None
97
+ self._bart_model = None
98
+ self._qwen_tokenizer = None
99
+ self._qwen_model = None
100
+
101
+ # -- model runners -------------------------------------------------------
102
+
103
+ def _run_qwen(self, user_msg: str) -> str:
104
+ if self._qwen_model is None:
105
+ from transformers import AutoModelForCausalLM, AutoTokenizer
106
+ model_id = "Qwen/Qwen2.5-1.5B-Instruct"
107
+ logger.info("Loading Qwen model (first request)...")
108
+ self._qwen_tokenizer = AutoTokenizer.from_pretrained(model_id)
109
+ self._qwen_model = AutoModelForCausalLM.from_pretrained(model_id)
110
+ logger.info("Qwen model loaded.")
111
+ messages = [
112
+ {"role": "system", "content": _SYSTEM_PROMPT},
113
+ {"role": "user", "content": user_msg},
114
+ ]
115
+ text = self._qwen_tokenizer.apply_chat_template(
116
+ messages, tokenize=False, add_generation_prompt=True
117
+ )
118
+ inputs = self._qwen_tokenizer(text, return_tensors="pt")
119
+ outputs = self._qwen_model.generate(
120
+ **inputs, max_new_tokens=256, temperature=0.1, do_sample=True
121
+ )
122
+ return self._qwen_tokenizer.decode(
123
+ outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True
124
+ )
125
+
126
+ def _run_gemini(self, user_msg: str) -> str:
127
+ import google.generativeai as genai
128
+ api_key = os.getenv("GEMINI_API_KEY")
129
+ if not api_key:
130
+ raise ValueError("GEMINI_API_KEY is not set")
131
+ genai.configure(api_key=api_key)
132
+ model = genai.GenerativeModel(
133
+ "gemini-2.0-flash",
134
+ system_instruction=_SYSTEM_PROMPT,
135
+ )
136
+ return model.generate_content(user_msg).text
137
+
138
+ def _run_grok(self, user_msg: str) -> str:
139
+ from openai import OpenAI
140
+ api_key = os.getenv("GROK_API_KEY")
141
+ if not api_key:
142
+ raise ValueError("GROK_API_KEY is not set")
143
+ client = OpenAI(api_key=api_key, base_url="https://api.x.ai/v1")
144
+ resp = client.chat.completions.create(
145
+ model="grok-3-mini",
146
+ messages=[
147
+ {"role": "system", "content": _SYSTEM_PROMPT},
148
+ {"role": "user", "content": user_msg},
149
+ ],
150
+ max_tokens=256,
151
+ temperature=0.1,
152
+ )
153
+ return resp.choices[0].message.content
154
+
155
+ def _run_bart(self, query: str) -> str:
156
+ if self._bart_model is None:
157
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
158
+ model_id = "ArchCoder/fine-tuned-bart-large"
159
+ logger.info("Loading BART model (first request)...")
160
+ self._bart_tokenizer = AutoTokenizer.from_pretrained(model_id)
161
+ self._bart_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
162
+ logger.info("BART model loaded.")
163
+ inputs = self._bart_tokenizer(
164
+ query, return_tensors="pt", max_length=512, truncation=True
165
+ )
166
+ outputs = self._bart_model.generate(**inputs, max_length=100)
167
+ return self._bart_tokenizer.decode(outputs[0], skip_special_tokens=True)
168
+
169
+ # -- main entry point ----------------------------------------------------
170
+
171
+ def process_request(self, data: dict) -> dict:
172
+ t0 = time.time()
173
+ query = data.get("query", "")
174
+ data_path = data.get("file_path")
175
+ model = data.get("model", "qwen")
176
+
177
+ if data_path and os.path.exists(data_path):
178
+ self.data_processor = DataProcessor(data_path)
179
+ self.chart_generator = ChartGenerator(self.data_processor.data)
180
+
181
+ columns = self.data_processor.get_columns()
182
+ dtypes = self.data_processor.get_dtypes()
183
+ sample_rows = self.data_processor.preview(3)
184
+
185
+ default_args = {
186
+ "x": columns[0] if columns else "Year",
187
+ "y": [columns[1]] if len(columns) > 1 else ["Sales"],
188
+ "chart_type": "line",
189
+ }
190
+
191
+ raw_text = ""
192
+ plot_args = None
193
+ try:
194
+ user_msg = _user_message(query, columns, dtypes, sample_rows)
195
+ if model == "gemini": raw_text = self._run_gemini(user_msg)
196
+ elif model == "grok": raw_text = self._run_grok(user_msg)
197
+ elif model == "bart": raw_text = self._run_bart(query)
198
+ else: raw_text = self._run_qwen(user_msg)
199
+
200
+ logger.info(f"LLM [{model}] output: {raw_text}")
201
+ parsed = _parse_output(raw_text)
202
+ plot_args = _validate(parsed, columns) if parsed else None
203
+ except Exception as exc:
204
+ logger.error(f"LLM error [{model}]: {exc}")
205
+ raw_text = str(exc)
206
+
207
+ if not plot_args:
208
+ logger.warning("Falling back to default plot args")
209
+ plot_args = default_args
210
+
211
+ try:
212
+ chart_result = self.chart_generator.generate_chart(plot_args)
213
+ chart_path = chart_result["chart_path"]
214
+ chart_spec = chart_result["chart_spec"]
215
+ except Exception as exc:
216
+ logger.error(f"Chart generation error: {exc}")
217
+ return {
218
+ "response": f"Chart generation failed: {exc}",
219
+ "chart_path": "",
220
+ "chart_spec": None,
221
+ "verified": False,
222
+ "plot_args": plot_args,
223
+ }
224
+
225
+ logger.info(f"Request processed in {time.time() - t0:.2f}s")
226
+ return {
227
+ "response": json.dumps(plot_args),
228
+ "chart_path": chart_path,
229
+ "chart_spec": chart_spec,
230
+ "verified": True,
231
+ "plot_args": plot_args,
232
+ }
requirements.txt ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ aiohttp
3
+ aiosignal
4
+ attrs
5
+ blinker
6
+ certifi
7
+ charset-normalizer
8
+ click
9
+ colorama
10
+ coloredlogs
11
+ contourpy
12
+ cycler
13
+ datasets
14
+ dill
15
+ et-xmlfile
16
+ filelock
17
+ Flask
18
+ Flask-Cors
19
+ fonttools
20
+ frozenlist
21
+ fsspec
22
+ google-generativeai>=0.8.0
23
+ huggingface-hub>=0.23.0
24
+ humanfriendly
25
+ idna
26
+ intel-openmp
27
+ itsdangerous
28
+ Jinja2
29
+ joblib
30
+ kiwisolver
31
+ MarkupSafe
32
+ matplotlib
33
+ mkl
34
+ mpmath
35
+ multidict
36
+ multiprocess
37
+ networkx
38
+ numpy
39
+ openai>=1.0.0
40
+ openpyxl
41
+ optimum
42
+ packaging
43
+ pandas
44
+ pillow
45
+ plotly>=5.18.0
46
+ protobuf
47
+ psutil
48
+ pyarrow
49
+ pyarrow-hotfix
50
+ pyparsing
51
+ pyreadline3
52
+ python-dateutil
53
+ python-dotenv
54
+ pytz
55
+ PyYAML
56
+ regex
57
+ requests
58
+ safetensors
59
+ scikit-learn
60
+ scipy
61
+ sentencepiece
62
+ six
63
+ sympy
64
+ tbb
65
+ threadpoolctl
66
+ tokenizers
67
+ torch
68
+ torchvision
69
+ tqdm
70
+ transformers>=4.36.0
71
+ typing_extensions
72
+ tzdata
73
+ urllib3
74
+ Werkzeug
75
+ xxhash
76
+ yarl
start.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Start script for backend Flask app on Hugging Face Spaces
3
+
4
+ export FLASK_APP=app.py
5
+ export FLASK_ENV=production
6
+
7
+ # Run the Flask app on 0.0.0.0:7860
8
+ python backend/app.py
tracked_files.txt ADDED
Binary file (10 Bytes). View file
 
train_model.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
4
+ from sklearn.model_selection import train_test_split
5
+
6
+ data = pd.read_csv('data/train_data.csv')
7
+ queries = data['query'].tolist()
8
+ arguments = data['arguments'].tolist()
9
+
10
+ train_queries, eval_queries, train_arguments, eval_arguments = train_test_split(queries, arguments, test_size=0.2, random_state=42)
11
+
12
+ tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large")
13
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large")
14
+
15
+ train_encodings = tokenizer(train_queries, truncation=True, padding=True)
16
+ eval_encodings = tokenizer(eval_queries, truncation=True, padding=True)
17
+
18
+ with tokenizer.as_target_tokenizer():
19
+ train_labels = tokenizer(train_arguments, truncation=True, padding=True)
20
+ eval_labels = tokenizer(eval_arguments, truncation=True, padding=True)
21
+
22
+ class PlotDataset(torch.utils.data.Dataset):
23
+ def __init__(self, encodings, labels):
24
+ self.encodings = encodings
25
+ self.labels = labels
26
+
27
+ def __getitem__(self, idx):
28
+ item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
29
+ item['labels'] = torch.tensor(self.labels['input_ids'][idx])
30
+ return item
31
+
32
+ def __len__(self):
33
+ return len(self.encodings.input_ids)
34
+
35
+ train_dataset = PlotDataset(train_encodings, train_labels)
36
+ eval_dataset = PlotDataset(eval_encodings, eval_labels)
37
+
38
+ training_args = Seq2SeqTrainingArguments(
39
+ output_dir='./results',
40
+ per_device_train_batch_size=2,
41
+ per_device_eval_batch_size=2,
42
+ num_train_epochs=3,
43
+ logging_dir='./logs',
44
+ logging_steps=10,
45
+ save_steps=500,
46
+ save_total_limit=2,
47
+ evaluation_strategy="epoch",
48
+ predict_with_generate=True,
49
+ generation_max_length=100,
50
+ )
51
+ trainer = Seq2SeqTrainer(
52
+ model=model,
53
+ args=training_args,
54
+ train_dataset=train_dataset,
55
+ eval_dataset=eval_dataset,
56
+ tokenizer=tokenizer,
57
+ )
58
+
59
+ trainer.train()
60
+
61
+ trainer.save_model("fine-tuned-bart-large")
62
+ tokenizer.save_pretrained("fine-tuned-bart-large")
63
+
64
+ print("Model and tokenizer fine-tuned and saved as 'fine-tuned-bart-large'")