Spaces:

ArchCoder
/

llm-excel-plotter-agent

Running

App Files Files Community

Priyansh Saxena commited on Mar 7

Commit

9439512

0 Parent(s):

deploy: production backend code for HF Space

Browse files

Files changed (25) hide show

.gitignore +2 -0
Dockerfile +17 -0
__init__.py +1 -0
app.py +131 -0
chart_generator.py +187 -0
data/readme +1 -0
data/sample_data.csv +5 -0
data/train_data.csv +100 -0
data_processor.py +64 -0
deploy_backend.ps1 +16 -0
fine-tuned-bart-large/README.md +60 -0
fine-tuned-bart-large/config.json +74 -0
fine-tuned-bart-large/generation_config.json +13 -0
fine-tuned-bart-large/merges.txt +0 -0
fine-tuned-bart-large/special_tokens_map.json +15 -0
fine-tuned-bart-large/tokenizer.json +0 -0
fine-tuned-bart-large/tokenizer_config.json +57 -0
fine-tuned-bart-large/training_args.bin +0 -0
fine-tuned-bart-large/vocab.json +0 -0
image_verifier.py +34 -0
llm_agent.py +232 -0
requirements.txt +76 -0
start.sh +8 -0
tracked_files.txt +0 -0
train_model.py +64 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ __pycache__

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM python:3.10-slim
+WORKDIR /app
+COPY . .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+RUN mkdir -p /app/data/uploads /app/static/images
+ENV TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers
+ENV HF_HOME=/app/.cache/huggingface
+EXPOSE 7860
+CMD ["python", "app.py"]

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # backend/__init__.py

app.py ADDED Viewed

	@@ -0,0 +1,131 @@

+from flask import Flask, request, jsonify, send_from_directory
+from flask_cors import CORS
+from llm_agent import LLM_Agent
+from data_processor import DataProcessor
+import os
+import logging
+import time
+from dotenv import load_dotenv
+from werkzeug.utils import secure_filename
+load_dotenv()
+logging.basicConfig(level=logging.INFO)
+logging.getLogger('matplotlib').setLevel(logging.WARNING)
+logging.getLogger('PIL').setLevel(logging.WARNING)
+logging.getLogger('plotly').setLevel(logging.WARNING)
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+app = Flask(__name__, static_folder=os.path.join(BASE_DIR, '..', 'static'))
+CORS(app, origins=[
+    "https://llm-integrated-excel-plotter-app.vercel.app",
+    "http://localhost:8080",
+    "http://localhost:3000",
+], supports_credentials=False)
+agent = LLM_Agent()
+UPLOAD_FOLDER     = os.path.join(BASE_DIR, '..', 'data', 'uploads')
+ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
+MAX_UPLOAD_BYTES   = 10 * 1024 * 1024  # 10 MB
+app.config['UPLOAD_FOLDER']        = UPLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH']   = MAX_UPLOAD_BYTES
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+def allowed_file(filename):
+    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+@app.route('/')
+def index():
+    return jsonify({
+        "status": "ok",
+        "message": "AI Data Visualization API",
+        "endpoints": ["/plot", "/upload", "/stats", "/models"]
+    })
+@app.route('/models', methods=['GET'])
+def models():
+    return jsonify({
+        "models": [
+            {"id": "qwen",   "name": "Qwen2.5-1.5B",    "provider": "Local (transformers)", "free": True},
+            {"id": "bart",   "name": "BART (fine-tuned)", "provider": "Local (transformers)", "free": True},
+            {"id": "gemini", "name": "Gemini 2.0 Flash", "provider": "Google AI (API key)",  "free": False},
+            {"id": "grok",   "name": "Grok-3 Mini",      "provider": "xAI (API key)",       "free": False},
+        ],
+        "default": "qwen"
+    })
+@app.route('/plot', methods=['POST'])
+def plot():
+    t0   = time.time()
+    data = request.get_json(force=True)
+    if not data or not data.get('query'):
+        return jsonify({'error': 'Missing required field: query'}), 400
+    logging.info(f"Plot request: model={data.get('model','qwen')} query={data.get('query')[:80]}")
+    result = agent.process_request(data)
+    logging.info(f"Plot completed in {time.time() - t0:.2f}s")
+    return jsonify(result)
+@app.route('/static/<path:filename>')
+def serve_static(filename):
+    resp = send_from_directory(app.static_folder, filename)
+    resp.headers['Access-Control-Allow-Origin'] = '*'
+    resp.headers['Cache-Control'] = 'public, max-age=300'
+    return resp
+@app.route('/upload', methods=['POST'])
+def upload_file():
+    if 'file' not in request.files:
+        return jsonify({'error': 'No file part in request'}), 400
+    file = request.files['file']
+    if not file.filename:
+        return jsonify({'error': 'No file selected'}), 400
+    if not allowed_file(file.filename):
+        return jsonify({'error': 'File type not allowed. Use CSV, XLS, or XLSX'}), 400
+    filename  = secure_filename(file.filename)
+    file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+    file.save(file_path)
+    dp = DataProcessor(file_path)
+    return jsonify({
+        'message':   'File uploaded successfully',
+        'columns':   dp.get_columns(),
+        'dtypes':    dp.get_dtypes(),
+        'preview':   dp.preview(5),
+        'file_path': file_path,
+        'row_count': len(dp.data),
+    })
+@app.route('/stats', methods=['POST'])
+def stats():
+    data      = request.get_json(force=True) or {}
+    file_path = data.get('file_path')
+    dp        = DataProcessor(file_path) if file_path and os.path.exists(file_path) else agent.data_processor
+    return jsonify({
+        'columns':   dp.get_columns(),
+        'dtypes':    dp.get_dtypes(),
+        'stats':     dp.get_stats(),
+        'row_count': len(dp.data),
+    })
+@app.errorhandler(413)
+def file_too_large(e):
+    return jsonify({'error': f'File too large. Maximum size is {MAX_UPLOAD_BYTES // (1024*1024)} MB'}), 413
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860)

chart_generator.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import logging
+import os
+import time
+import uuid
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import pandas as pd
+import plotly.graph_objects as go
+logger = logging.getLogger(__name__)
+_PLOTLY_LAYOUT = dict(
+    font=dict(family="Inter, system-ui, sans-serif", size=13),
+    plot_bgcolor="#0f1117",
+    paper_bgcolor="#0f1117",
+    font_color="#e2e8f0",
+    margin=dict(l=60, r=30, t=60, b=60),
+    legend=dict(bgcolor="rgba(0,0,0,0)", borderwidth=0),
+    xaxis=dict(gridcolor="#1e2d3d", linecolor="#2d3748", zerolinecolor="#2d3748"),
+    yaxis=dict(gridcolor="#1e2d3d", linecolor="#2d3748", zerolinecolor="#2d3748"),
+    colorway=["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa", "#06b6d4"],
+)
+class ChartGenerator:
+    def __init__(self, data=None):
+        logger.info("Initializing ChartGenerator")
+        if data is not None and not (isinstance(data, pd.DataFrame) and data.empty):
+            self.data = data
+        else:
+            default_csv = os.path.join(
+                os.path.dirname(__file__), "data", "sample_data.csv"
+            )
+            self.data = pd.read_csv(default_csv) if os.path.exists(default_csv) else pd.DataFrame()
+    # -----------------------------------------------------------------------
+    # Public
+    # -----------------------------------------------------------------------
+    def generate_chart(self, plot_args: dict) -> dict:
+        """Return {"chart_path": str, "chart_spec": dict}."""
+        t0 = time.time()
+        logger.info(f"Generating chart: {plot_args}")
+        x_col      = plot_args["x"]
+        y_cols     = plot_args["y"]
+        chart_type = plot_args.get("chart_type", "line")
+        color      = plot_args.get("color", None)
+        self._validate_columns(x_col, y_cols)
+        chart_path = self._save_matplotlib(x_col, y_cols, chart_type, color)
+        chart_spec = self._build_plotly_spec(x_col, y_cols, chart_type, color)
+        logger.info(f"Chart ready in {time.time() - t0:.2f}s")
+        return {"chart_path": chart_path, "chart_spec": chart_spec}
+    # -----------------------------------------------------------------------
+    # Validation
+    # -----------------------------------------------------------------------
+    def _validate_columns(self, x_col: str, y_cols: list):
+        missing = [c for c in [x_col] + y_cols if c not in self.data.columns]
+        if missing:
+            raise ValueError(
+                f"Columns not found in data: {missing}. "
+                f"Available: {list(self.data.columns)}"
+            )
+    # -----------------------------------------------------------------------
+    # Matplotlib (static PNG)
+    # -----------------------------------------------------------------------
+    def _save_matplotlib(self, x_col, y_cols, chart_type, color) -> str:
+        plt.clf()
+        plt.close("all")
+        fig, ax = plt.subplots(figsize=(10, 6))
+        fig.patch.set_facecolor("#0f1117")
+        ax.set_facecolor("#0f1117")
+        palette = ["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa"]
+        x = self.data[x_col]
+        for i, y_col in enumerate(y_cols):
+            c = color or palette[i % len(palette)]
+            y = self.data[y_col]
+            if chart_type == "bar":
+                ax.bar(x, y, label=y_col, color=c, alpha=0.85)
+            elif chart_type == "scatter":
+                ax.scatter(x, y, label=y_col, color=c, alpha=0.8)
+            elif chart_type == "area":
+                ax.fill_between(x, y, label=y_col, color=c, alpha=0.4)
+                ax.plot(x, y, color=c)
+            elif chart_type == "histogram":
+                ax.hist(y, label=y_col, color=c, alpha=0.8, bins="auto", edgecolor="#1e2d3d")
+            elif chart_type == "box":
+                ax.boxplot(
+                    [self.data[y_col].dropna().values for y_col in y_cols],
+                    labels=y_cols,
+                    patch_artist=True,
+                    boxprops=dict(facecolor=c, color="#e2e8f0"),
+                    medianprops=dict(color="#f59e0b", linewidth=2),
+                )
+                break
+            elif chart_type == "pie":
+                ax.pie(
+                    y, labels=x, autopct="%1.1f%%",
+                    colors=palette, startangle=90,
+                    wedgeprops=dict(edgecolor="#0f1117"),
+                )
+                ax.set_aspect("equal")
+                break
+            else:
+                ax.plot(x, y, label=y_col, color=c, marker="o", linewidth=2)
+        for spine in ax.spines.values():
+            spine.set_edgecolor("#2d3748")
+        ax.tick_params(colors="#94a3b8")
+        ax.xaxis.label.set_color("#94a3b8")
+        ax.yaxis.label.set_color("#94a3b8")
+        ax.set_xlabel(x_col, fontsize=11)
+        ax.set_ylabel(" / ".join(y_cols), fontsize=11)
+        ax.set_title(f"{chart_type.title()} \u2014 {', '.join(y_cols)} vs {x_col}",
+                     color="#e2e8f0", fontsize=13, pad=12)
+        ax.grid(True, alpha=0.15, color="#1e2d3d")
+        if chart_type not in ("pie", "histogram"):
+            ax.legend(facecolor="#161b27", edgecolor="#2d3748", labelcolor="#e2e8f0")
+        if chart_type not in ("pie", "histogram", "box") and len(x) > 5:
+            plt.xticks(rotation=45, ha="right")
+        output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "static", "images")
+        os.makedirs(output_dir, exist_ok=True)
+        filename  = f"chart_{uuid.uuid4().hex[:12]}.png"
+        full_path = os.path.join(output_dir, filename)
+        plt.savefig(full_path, dpi=150, bbox_inches="tight", facecolor=fig.get_facecolor())
+        plt.close(fig)
+        logger.info(f"Saved PNG: {full_path} ({os.path.getsize(full_path)} bytes)")
+        return os.path.join("static", "images", filename)
+    # -----------------------------------------------------------------------
+    # Plotly (interactive JSON spec for frontend)
+    # -----------------------------------------------------------------------
+    def _build_plotly_spec(self, x_col, y_cols, chart_type, color) -> dict:
+        palette = ["#4f8cff", "#34d399", "#f59e0b", "#ef4444", "#a78bfa"]
+        x = self.data[x_col].tolist()
+        traces = []
+        for i, y_col in enumerate(y_cols):
+            c = color or palette[i % len(palette)]
+            y = self.data[y_col].tolist()
+            if chart_type == "bar":
+                traces.append(go.Bar(x=x, y=y, name=y_col, marker_color=c, opacity=0.85).to_plotly_json())
+            elif chart_type == "scatter":
+                traces.append(go.Scatter(x=x, y=y, name=y_col, mode="markers",
+                                          marker=dict(color=c, size=8, opacity=0.8)).to_plotly_json())
+            elif chart_type == "area":
+                traces.append(go.Scatter(x=x, y=y, name=y_col, mode="lines",
+                                          fill="tozeroy", line=dict(color=c)).to_plotly_json())
+            elif chart_type == "histogram":
+                traces.append(go.Histogram(x=y, name=y_col, marker_color=c, opacity=0.8).to_plotly_json())
+            elif chart_type == "box":
+                traces.append(go.Box(y=y, name=y_col, marker_color=c,
+                                      line_color="#e2e8f0", fillcolor=c).to_plotly_json())
+            elif chart_type == "pie":
+                traces.append(go.Pie(labels=x, values=y, name=y_col,
+                                      marker=dict(colors=palette)).to_plotly_json())
+                break
+            else:  # line
+                traces.append(go.Scatter(x=x, y=y, name=y_col, mode="lines+markers",
+                                          line=dict(color=c, width=2),
+                                          marker=dict(size=6)).to_plotly_json())
+        layout = dict(
+            **_PLOTLY_LAYOUT,
+            title=dict(
+                text=f"{chart_type.title()} \u2014 {', '.join(y_cols)} vs {x_col}",
+                font=dict(size=15, color="#e2e8f0"),
+            ),
+            xaxis=dict(**_PLOTLY_LAYOUT["xaxis"], title=x_col),
+            yaxis=dict(**_PLOTLY_LAYOUT["yaxis"], title=" / ".join(y_cols)),
+        )
+        return {"data": traces, "layout": layout}

data/readme ADDED Viewed

	@@ -0,0 +1 @@


1	+

data/sample_data.csv ADDED Viewed

	@@ -0,0 +1,5 @@

+Year,Sales,Employee expense,Net profit,EBITDA,EBIT,RoCE,interest,WC %
+2020,1000,200,100,300,250,10,50,5
+2021,1200,220,150,350,300,12,55,6
+2022,1400,250,200,400,350,15,60,7
+2023,1600,270,250,450,400,18,65,8

data/train_data.csv ADDED Viewed

	@@ -0,0 +1,100 @@

+query,arguments
+plot the sales in the years with red line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'red'}"
+show employee expenses and net profit over the years,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
+display the EBITDA for each year with a blue bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'blue'}"
+plot the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
+show the interest payments each year with a green bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'green'}"
+display the working capital percentage over the years,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+plot the EBIT for each year with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
+show sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
+display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
+plot the employee expenses each year with a red line,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line', 'color': 'red'}"
+show the annual sales in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
+display EBIT and EBITDA over the years,"{'x': 'Year', 'y': ['EBIT', 'EBITDA'], 'chart_type': 'line'}"
+plot the RoCE for each year with a purple line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'purple'}"
+show the interest and working capital percentage,"{'x': 'Year', 'y': ['interest', 'WC %'], 'chart_type': 'line'}"
+display the annual net profit with a blue bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'blue'}"
+plot the sales and employee expenses in a line chart,"{'x': 'Year', 'y': ['Sales', 'Employee expense'], 'chart_type': 'line'}"
+show the EBITDA for each year with a green bar,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar', 'color': 'green'}"
+display the EBIT over time with an orange line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'orange'}"
+plot the net profit each year with a red bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'red'}"
+show the employee expenses in a line chart,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
+display the annual interest payments with a blue line,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line', 'color': 'blue'}"
+plot the RoCE and WC % over the years,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
+show the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
+display EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
+plot the employee expenses and EBIT,"{'x': 'Year', 'y': ['Employee expense', 'EBIT'], 'chart_type': 'line'}"
+show the working capital percentage each year,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+display the RoCE in a bar chart,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'bar'}"
+plot the annual sales with a green line,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line', 'color': 'green'}"
+show the EBIT and interest over time,"{'x': 'Year', 'y': ['EBIT', 'interest'], 'chart_type': 'line'}"
+display the net profit each year with a purple bar,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar', 'color': 'purple'}"
+plot the employee expenses over the years,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
+show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
+display EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
+plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
+show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
+display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
+plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
+show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
+display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
+plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
+show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
+display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
+plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
+show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
+display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
+plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
+display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
+plot the employee expenses each year,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'line'}"
+show the EBITDA in a bar chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'bar'}"
+display the EBIT with a red line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'red'}"
+plot the sales each year with an orange bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'orange'}"
+show the employee expenses and net profit,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
+display the RoCE and working capital percentage,"{'x': 'Year', 'y': ['RoCE', 'WC %'], 'chart_type': 'line'}"
+plot the EBITDA and EBIT,"{'x': 'Year', 'y': ['EBITDA', 'EBIT'], 'chart_type': 'line'}"
+show the sales in a line chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'line'}"
+display the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
+plot the annual interest payments,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
+show the RoCE over time,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line'}"
+display the working capital percentage,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+plot the EBITDA each year,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
+show the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
+display the EBIT and RoCE,"{'x': 'Year', 'y': ['EBIT', 'RoCE'], 'chart_type': 'line'}"
+plot the sales and net profit each year,"{'x': 'Year', 'y': ['Sales', 'Net profit'], 'chart_type': 'line'}"
+show the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
+display the working capital percentage with a red bar,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'bar', 'color': 'red'}"
+plot the RoCE for each year with a blue line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'blue'}"
+show the sales over the years in a bar chart,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar'}"
+display EBITDA and employee expenses,"{'x': 'Year', 'y': ['EBITDA', 'Employee expense'], 'chart_type': 'line'}"
+plot the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
+show the net profit each year,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
+display the employee expenses with a green bar,"{'x': 'Year', 'y': ['Employee expense'], 'chart_type': 'bar', 'color': 'green'}"
+plot the EBITDA and net profit,"{'x': 'Year', 'y': ['EBITDA', 'Net profit'], 'chart_type': 'line'}"
+show the RoCE each year with an orange line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'orange'}"
+display the sales and EBIT over time,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
+plot the working capital percentage annually,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+show the interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
+display the annual net profit,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'line'}"
+plot the EBIT and sales over time,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
+show the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"
+display the employee expenses and EBITDA over the years,"{'x': 'Year', 'y': ['Employee expense', 'EBITDA'], 'chart_type': 'line'}"
+plot the RoCE for each year with a red line,"{'x': 'Year', 'y': ['RoCE'], 'chart_type': 'line', 'color': 'red'}"
+show the interest payments each year,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
+display the working capital percentage over time,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line'}"
+plot the EBIT for each year with a blue line,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line', 'color': 'blue'}"
+show sales and EBITDA over time,"{'x': 'Year', 'y': ['Sales', 'EBITDA'], 'chart_type': 'line'}"
+display the net profit and RoCE,"{'x': 'Year', 'y': ['Net profit', 'RoCE'], 'chart_type': 'line'}"
+plot the employee expenses and net profit each year,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
+show the EBITDA in a line chart,"{'x': 'Year', 'y': ['EBITDA'], 'chart_type': 'line'}"
+display the sales and EBIT over the years,"{'x': 'Year', 'y': ['Sales', 'EBIT'], 'chart_type': 'line'}"
+plot the working capital percentage with a red line,"{'x': 'Year', 'y': ['WC %'], 'chart_type': 'line', 'color': 'red'}"
+show the annual interest payments with a blue bar,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'bar', 'color': 'blue'}"
+display the EBIT annually,"{'x': 'Year', 'y': ['EBIT'], 'chart_type': 'line'}"
+plot the sales each year with a green bar,"{'x': 'Year', 'y': ['Sales'], 'chart_type': 'bar', 'color': 'green'}"
+show the employee expenses and net profit over time,"{'x': 'Year', 'y': ['Employee expense', 'Net profit'], 'chart_type': 'line'}"
+display the RoCE and EBITDA,"{'x': 'Year', 'y': ['RoCE', 'EBITDA'], 'chart_type': 'line'}"
+plot the interest payments annually,"{'x': 'Year', 'y': ['interest'], 'chart_type': 'line'}"
+show the EBIT and sales each year,"{'x': 'Year', 'y': ['EBIT', 'Sales'], 'chart_type': 'line'}"
+display the net profit in a bar chart,"{'x': 'Year', 'y': ['Net profit'], 'chart_type': 'bar'}"

data_processor.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import pandas as pd
+import os
+import logging
+class DataProcessor:
+    def __init__(self, data_path=None):
+        logging.info("Initializing DataProcessor")
+        # Allow dynamic data path (for user uploads), fallback to default
+        if data_path and os.path.exists(data_path):
+            self.data_path = data_path
+        else:
+            self.data_path = os.path.join(os.path.dirname(__file__), 'data', 'sample_data.csv')
+        self.data = self.load_data(self.data_path)
+    def load_data(self, path):
+        ext = os.path.splitext(path)[1].lower()
+        try:
+            if ext == '.csv':
+                data = pd.read_csv(path)
+            elif ext in ['.xls', '.xlsx']:
+                data = pd.read_excel(path)
+            else:
+                raise ValueError(f"Unsupported file type: {ext}")
+            logging.info(f"Loaded data from {path} with shape {data.shape}")
+            return data
+        except Exception as e:
+            logging.error(f"Failed to load data: {e}")
+            return pd.DataFrame()
+    def validate_columns(self, required_columns):
+        missing = [col for col in required_columns if col not in self.data.columns]
+        if missing:
+            logging.warning(f"Missing columns: {missing}")
+            return False, missing
+        return True, []
+    def get_columns(self):
+        return list(self.data.columns)
+    def preview(self, n=5):
+        return self.data.head(n).to_dict(orient='records')
+    def get_dtypes(self) -> dict:
+        result = {}
+        for col, dtype in self.data.dtypes.items():
+            if pd.api.types.is_integer_dtype(dtype):
+                result[col] = "integer"
+            elif pd.api.types.is_float_dtype(dtype):
+                result[col] = "float"
+            elif pd.api.types.is_datetime64_any_dtype(dtype):
+                result[col] = "datetime"
+            elif pd.api.types.is_bool_dtype(dtype):
+                result[col] = "boolean"
+            else:
+                result[col] = "string"
+        return result
+    def get_stats(self) -> dict:
+        numeric = self.data.select_dtypes(include='number')
+        if numeric.empty:
+            return {}
+        desc = numeric.describe().to_dict()
+        return {col: {k: round(v, 4) for k, v in stats.items()} for col, stats in desc.items()}

deploy_backend.ps1 ADDED Viewed

	@@ -0,0 +1,16 @@

+$ErrorActionPreference = 'Stop'
+$files = git ls-files
+foreach ($f in $files) {
+    if ($f -notlike 'backend/*' -and $f -ne 'backend') {
+        git rm -q --ignore-unmatch -- "$f"
+    }
+}
+Write-Output "-- staged removals --"
+git status --porcelain=2 --branch
+$staged = git diff --staged --name-only
+if (-not $staged) {
+    git commit --allow-empty -m 'Deploy: keep only backend (empty commit if no removals)'
+} else {
+    git commit -m 'Deploy: keep only backend'
+}
+Write-Output "-- commit done --"

fine-tuned-bart-large/README.md ADDED Viewed

	@@ -0,0 +1,60 @@

+---
+license: mit
+library_name: transformers
+language: en
+datasets:
+- your-dataset-name
+metrics:
+- rouge
+base_model: facebook/bart-large
+tags:
+- text2text-generation
+- summarization
+- fine-tuned
+pipeline_tag: summarization
+model-index:
+- name: fine-tuned-bart-large
+  results:
+  - task: summarization
+    dataset: your-dataset-name
+    metrics:
+    - rouge1: 0.45
+    - rouge2: 0.22
+    - rougel: 0.40
+---
+# Fine-tuned BART Large Model
+This repository contains a fine-tuned BART large model for text summarization tasks.
+## Model Details
+- Base model: facebook/bart-large
+- Fine-tuned on: your-dataset-name
+- License: MIT
+## Usage
+You can load this model using the Hugging Face Transformers library:
+```python
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+model_name = "ArchCoder/fine-tuned-bart-large"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+```
+Replace `"ArchCoder/fine-tuned-bart-large"` with your actual model repo name.
+## Evaluation
+The model was evaluated on the your-dataset-name dataset with the following metrics:
+- ROUGE-1: 0.45
+- ROUGE-2: 0.22
+- ROUGE-L: 0.40
+## License
+This model is licensed under the MIT License.

fine-tuned-bart-large/config.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "_name_or_path": "facebook/bart-large",
+  "activation_dropout": 0.1,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForConditionalGeneration"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "classif_dropout": 0.1,
+  "classifier_dropout": 0.0,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "no_repeat_ngram_size": 3,
+  "normalize_before": false,
+  "num_beams": 4,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "length_penalty": 1.0,
+      "max_length": 128,
+      "min_length": 12,
+      "num_beams": 4
+    },
+    "summarization_cnn": {
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "num_beams": 4
+    },
+    "summarization_xsum": {
+      "length_penalty": 1.0,
+      "max_length": 62,
+      "min_length": 11,
+      "num_beams": 6
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.42.3",
+  "use_cache": true,
+  "vocab_size": 50265
+}

fine-tuned-bart-large/generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "early_stopping": true,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "no_repeat_ngram_size": 3,
+  "num_beams": 4,
+  "pad_token_id": 1,
+  "transformers_version": "4.42.3"
+}

fine-tuned-bart-large/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

fine-tuned-bart-large/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

fine-tuned-bart-large/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

fine-tuned-bart-large/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 1024,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "BartTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

fine-tuned-bart-large/training_args.bin ADDED Viewed

Binary file (5.24 kB). View file

fine-tuned-bart-large/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

image_verifier.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from PIL import Image
+import torch
+from transformers import CLIPProcessor, CLIPModel
+import os
+import logging
+import time
+from dotenv import load_dotenv
+load_dotenv()
+class ImageVerifier:
+    def __init__(self):
+        logging.info("Initializing ImageVerifier")
+        self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+        self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+    def verify(self, image_path, query):
+        start_time = time.time()
+        logging.info(f"Verifying image {image_path} with query: {query}")
+        full_image_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), image_path)
+        image = Image.open(full_image_path)
+        inputs = self.processor(text=[query], images=image, return_tensors="pt", padding=True)
+        outputs = self.model(**inputs)
+        logits_per_image = outputs.logits_per_image
+        probs = logits_per_image.softmax(dim=1)
+        verification_result = probs.argmax().item() == 0
+        end_time = time.time()
+        logging.info(f"Image verification result: {verification_result} in {end_time - start_time} seconds")
+        return verification_result

llm_agent.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import ast
+import json
+import logging
+import os
+import time
+from dotenv import load_dotenv
+from chart_generator import ChartGenerator
+from data_processor import DataProcessor
+load_dotenv()
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Prompt templates
+# ---------------------------------------------------------------------------
+_SYSTEM_PROMPT = (
+    "You are a data visualization expert. "
+    "Given the user request and the dataset schema provided, output ONLY a valid JSON "
+    "object — no explanation, no markdown fences, no extra text.\n\n"
+    "Required keys:\n"
+    '  "x"          : string  — exact column name for the x-axis\n'
+    '  "y"          : array   — one or more exact column names for the y-axis\n'
+    '  "chart_type" : string  — one of: line, bar, scatter, pie, histogram, box, area\n'
+    '  "color"      : string  — optional CSS color, e.g. "red", "#4f8cff"\n\n'
+    "Rules:\n"
+    "- Use only column names that appear in the schema. Never invent names.\n"
+    "- For pie: y must contain exactly one column.\n"
+    "- For histogram/box: x may equal the first element of y.\n"
+    "- Default to line if chart type is ambiguous."
+)
+def _user_message(query: str, columns: list, dtypes: dict, sample_rows: list) -> str:
+    schema = "\n".join(f"  - {c} ({dtypes.get(c, 'unknown')})" for c in columns)
+    samples = "".join(f"  {json.dumps(r)}\n" for r in sample_rows[:3])
+    return (
+        f"Dataset columns:\n{schema}\n\n"
+        f"Sample rows (first 3):\n{samples}\n"
+        f"User request: {query}"
+    )
+# ---------------------------------------------------------------------------
+# Output parsing & validation
+# ---------------------------------------------------------------------------
+def _parse_output(text: str):
+    text = text.strip()
+    if "```" in text:
+        for part in text.split("```"):
+            part = part.strip().lstrip("json").strip()
+            if part.startswith("{"):
+                text = part
+                break
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        pass
+    try:
+        return ast.literal_eval(text)
+    except (SyntaxError, ValueError):
+        pass
+    return None
+def _validate(args: dict, columns: list):
+    if not isinstance(args, dict):
+        return None
+    if not all(k in args for k in ("x", "y", "chart_type")):
+        return None
+    if isinstance(args["y"], str):
+        args["y"] = [args["y"]]
+    valid = {"line", "bar", "scatter", "pie", "histogram", "box", "area"}
+    if args["chart_type"] not in valid:
+        args["chart_type"] = "line"
+    if args["x"] not in columns:
+        return None
+    if not all(c in columns for c in args["y"]):
+        return None
+    return args
+# ---------------------------------------------------------------------------
+# Agent
+# ---------------------------------------------------------------------------
+class LLM_Agent:
+    def __init__(self, data_path=None):
+        logger.info("Initializing LLM_Agent")
+        self.data_processor = DataProcessor(data_path)
+        self.chart_generator = ChartGenerator(self.data_processor.data)
+        self._bart_tokenizer = None
+        self._bart_model = None
+        self._qwen_tokenizer = None
+        self._qwen_model = None
+    # -- model runners -------------------------------------------------------
+    def _run_qwen(self, user_msg: str) -> str:
+        if self._qwen_model is None:
+            from transformers import AutoModelForCausalLM, AutoTokenizer
+            model_id = "Qwen/Qwen2.5-1.5B-Instruct"
+            logger.info("Loading Qwen model (first request)...")
+            self._qwen_tokenizer = AutoTokenizer.from_pretrained(model_id)
+            self._qwen_model = AutoModelForCausalLM.from_pretrained(model_id)
+            logger.info("Qwen model loaded.")
+        messages = [
+            {"role": "system", "content": _SYSTEM_PROMPT},
+            {"role": "user",   "content": user_msg},
+        ]
+        text = self._qwen_tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        inputs = self._qwen_tokenizer(text, return_tensors="pt")
+        outputs = self._qwen_model.generate(
+            **inputs, max_new_tokens=256, temperature=0.1, do_sample=True
+        )
+        return self._qwen_tokenizer.decode(
+            outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True
+        )
+    def _run_gemini(self, user_msg: str) -> str:
+        import google.generativeai as genai
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            raise ValueError("GEMINI_API_KEY is not set")
+        genai.configure(api_key=api_key)
+        model = genai.GenerativeModel(
+            "gemini-2.0-flash",
+            system_instruction=_SYSTEM_PROMPT,
+        )
+        return model.generate_content(user_msg).text
+    def _run_grok(self, user_msg: str) -> str:
+        from openai import OpenAI
+        api_key = os.getenv("GROK_API_KEY")
+        if not api_key:
+            raise ValueError("GROK_API_KEY is not set")
+        client = OpenAI(api_key=api_key, base_url="https://api.x.ai/v1")
+        resp = client.chat.completions.create(
+            model="grok-3-mini",
+            messages=[
+                {"role": "system", "content": _SYSTEM_PROMPT},
+                {"role": "user",   "content": user_msg},
+            ],
+            max_tokens=256,
+            temperature=0.1,
+        )
+        return resp.choices[0].message.content
+    def _run_bart(self, query: str) -> str:
+        if self._bart_model is None:
+            from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+            model_id = "ArchCoder/fine-tuned-bart-large"
+            logger.info("Loading BART model (first request)...")
+            self._bart_tokenizer = AutoTokenizer.from_pretrained(model_id)
+            self._bart_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+            logger.info("BART model loaded.")
+        inputs = self._bart_tokenizer(
+            query, return_tensors="pt", max_length=512, truncation=True
+        )
+        outputs = self._bart_model.generate(**inputs, max_length=100)
+        return self._bart_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # -- main entry point ----------------------------------------------------
+    def process_request(self, data: dict) -> dict:
+        t0        = time.time()
+        query     = data.get("query", "")
+        data_path = data.get("file_path")
+        model     = data.get("model", "qwen")
+        if data_path and os.path.exists(data_path):
+            self.data_processor  = DataProcessor(data_path)
+            self.chart_generator = ChartGenerator(self.data_processor.data)
+        columns     = self.data_processor.get_columns()
+        dtypes      = self.data_processor.get_dtypes()
+        sample_rows = self.data_processor.preview(3)
+        default_args = {
+            "x":          columns[0] if columns else "Year",
+            "y":          [columns[1]] if len(columns) > 1 else ["Sales"],
+            "chart_type": "line",
+        }
+        raw_text  = ""
+        plot_args = None
+        try:
+            user_msg = _user_message(query, columns, dtypes, sample_rows)
+            if   model == "gemini": raw_text = self._run_gemini(user_msg)
+            elif model == "grok":   raw_text = self._run_grok(user_msg)
+            elif model == "bart":   raw_text = self._run_bart(query)
+            else:                   raw_text = self._run_qwen(user_msg)
+            logger.info(f"LLM [{model}] output: {raw_text}")
+            parsed    = _parse_output(raw_text)
+            plot_args = _validate(parsed, columns) if parsed else None
+        except Exception as exc:
+            logger.error(f"LLM error [{model}]: {exc}")
+            raw_text = str(exc)
+        if not plot_args:
+            logger.warning("Falling back to default plot args")
+            plot_args = default_args
+        try:
+            chart_result = self.chart_generator.generate_chart(plot_args)
+            chart_path   = chart_result["chart_path"]
+            chart_spec   = chart_result["chart_spec"]
+        except Exception as exc:
+            logger.error(f"Chart generation error: {exc}")
+            return {
+                "response":   f"Chart generation failed: {exc}",
+                "chart_path": "",
+                "chart_spec": None,
+                "verified":   False,
+                "plot_args":  plot_args,
+            }
+        logger.info(f"Request processed in {time.time() - t0:.2f}s")
+        return {
+            "response":   json.dumps(plot_args),
+            "chart_path": chart_path,
+            "chart_spec": chart_spec,
+            "verified":   True,
+            "plot_args":  plot_args,
+        }

requirements.txt ADDED Viewed

	@@ -0,0 +1,76 @@

+accelerate
+aiohttp
+aiosignal
+attrs
+blinker
+certifi
+charset-normalizer
+click
+colorama
+coloredlogs
+contourpy
+cycler
+datasets
+dill
+et-xmlfile
+filelock
+Flask
+Flask-Cors
+fonttools
+frozenlist
+fsspec
+google-generativeai>=0.8.0
+huggingface-hub>=0.23.0
+humanfriendly
+idna
+intel-openmp
+itsdangerous
+Jinja2
+joblib
+kiwisolver
+MarkupSafe
+matplotlib
+mkl
+mpmath
+multidict
+multiprocess
+networkx
+numpy
+openai>=1.0.0
+openpyxl
+optimum
+packaging
+pandas
+pillow
+plotly>=5.18.0
+protobuf
+psutil
+pyarrow
+pyarrow-hotfix
+pyparsing
+pyreadline3
+python-dateutil
+python-dotenv
+pytz
+PyYAML
+regex
+requests
+safetensors
+scikit-learn
+scipy
+sentencepiece
+six
+sympy
+tbb
+threadpoolctl
+tokenizers
+torch
+torchvision
+tqdm
+transformers>=4.36.0
+typing_extensions
+tzdata
+urllib3
+Werkzeug
+xxhash
+yarl

start.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+#!/bin/bash
+# Start script for backend Flask app on Hugging Face Spaces
+export FLASK_APP=app.py
+export FLASK_ENV=production
+# Run the Flask app on 0.0.0.0:7860
+python backend/app.py

tracked_files.txt ADDED Viewed

Binary file (10 Bytes). View file

train_model.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import pandas as pd
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Seq2SeqTrainer, Seq2SeqTrainingArguments
+from sklearn.model_selection import train_test_split
+data = pd.read_csv('data/train_data.csv')
+queries = data['query'].tolist()
+arguments = data['arguments'].tolist()
+train_queries, eval_queries, train_arguments, eval_arguments = train_test_split(queries, arguments, test_size=0.2, random_state=42)
+tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large")
+model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large")
+train_encodings = tokenizer(train_queries, truncation=True, padding=True)
+eval_encodings = tokenizer(eval_queries, truncation=True, padding=True)
+with tokenizer.as_target_tokenizer():
+    train_labels = tokenizer(train_arguments, truncation=True, padding=True)
+    eval_labels = tokenizer(eval_arguments, truncation=True, padding=True)
+class PlotDataset(torch.utils.data.Dataset):
+    def __init__(self, encodings, labels):
+        self.encodings = encodings
+        self.labels = labels
+    def __getitem__(self, idx):
+        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
+        item['labels'] = torch.tensor(self.labels['input_ids'][idx])
+        return item
+    def __len__(self):
+        return len(self.encodings.input_ids)
+train_dataset = PlotDataset(train_encodings, train_labels)
+eval_dataset = PlotDataset(eval_encodings, eval_labels)
+training_args = Seq2SeqTrainingArguments(
+    output_dir='./results',
+    per_device_train_batch_size=2,
+    per_device_eval_batch_size=2,
+    num_train_epochs=3,
+    logging_dir='./logs',
+    logging_steps=10,
+    save_steps=500,
+    save_total_limit=2,
+    evaluation_strategy="epoch",
+    predict_with_generate=True,
+    generation_max_length=100,
+)
+trainer = Seq2SeqTrainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    tokenizer=tokenizer,
+)
+trainer.train()
+trainer.save_model("fine-tuned-bart-large")
+tokenizer.save_pretrained("fine-tuned-bart-large")
+print("Model and tokenizer fine-tuned and saved as 'fine-tuned-bart-large'")