Spaces:

ArchCoder
/

llm-excel-plotter-agent

Sleeping

App Files Files Community

Priyansh Saxena commited on Mar 7

Commit

f3fd40f

1 Parent(s): 962831e

fix: remove runtime model dependency and repair chart generation

Browse files

Files changed (3) hide show

app.py +2 -2
chart_generator.py +8 -10
llm_agent.py +107 -5

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ logging.getLogger('plotly').setLevel(logging.WARNING)
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-app = Flask(__name__, static_folder=os.path.join(BASE_DIR, '..', 'static'))
 CORS(app, origins=[
     "https://llm-integrated-excel-plotter-app.vercel.app",
@@ -27,7 +27,7 @@ CORS(app, origins=[
 agent = LLM_Agent()
-UPLOAD_FOLDER     = os.path.join(BASE_DIR, '..', 'data', 'uploads')
 ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
 MAX_UPLOAD_BYTES   = 10 * 1024 * 1024  # 10 MB

 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+app = Flask(__name__, static_folder=os.path.join(BASE_DIR, 'static'))
 CORS(app, origins=[
     "https://llm-integrated-excel-plotter-app.vercel.app",
 agent = LLM_Agent()
+UPLOAD_FOLDER     = os.path.join(BASE_DIR, 'data', 'uploads')
 ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
 MAX_UPLOAD_BYTES   = 10 * 1024 * 1024  # 10 MB

chart_generator.py CHANGED Viewed

@@ -130,7 +130,7 @@ class ChartGenerator:
         if chart_type not in ("pie", "histogram", "box") and len(x) > 5:
             plt.xticks(rotation=45, ha="right")
-        output_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "static", "images")
         os.makedirs(output_dir, exist_ok=True)
         filename  = f"chart_{uuid.uuid4().hex[:12]}.png"
         full_path = os.path.join(output_dir, filename)
@@ -174,14 +174,12 @@ class ChartGenerator:
                                           line=dict(color=c, width=2),
                                           marker=dict(size=6)).to_plotly_json())
-        layout = dict(
-            **_PLOTLY_LAYOUT,
-            title=dict(
-                text=f"{chart_type.title()} \u2014 {', '.join(y_cols)} vs {x_col}",
-                font=dict(size=15, color="#e2e8f0"),
-            ),
-            xaxis=dict(**_PLOTLY_LAYOUT["xaxis"], title=x_col),
-            yaxis=dict(**_PLOTLY_LAYOUT["yaxis"], title=" / ".join(y_cols)),
-        )
         return {"data": traces, "layout": layout}

         if chart_type not in ("pie", "histogram", "box") and len(x) > 5:
             plt.xticks(rotation=45, ha="right")
+        output_dir = os.path.join(os.path.dirname(__file__), "static", "images")
         os.makedirs(output_dir, exist_ok=True)
         filename  = f"chart_{uuid.uuid4().hex[:12]}.png"
         full_path = os.path.join(output_dir, filename)
                                           line=dict(color=c, width=2),
                                           marker=dict(size=6)).to_plotly_json())
+        layout = {**_PLOTLY_LAYOUT}
+        layout["title"] = {
+            "text": f"{chart_type.title()} \u2014 {', '.join(y_cols)} vs {x_col}",
+            "font": {"size": 15, "color": "#e2e8f0"},
+        }
+        layout["xaxis"] = {**_PLOTLY_LAYOUT["xaxis"], "title": x_col}
+        layout["yaxis"] = {**_PLOTLY_LAYOUT["yaxis"], "title": " / ".join(y_cols)}
         return {"data": traces, "layout": layout}

llm_agent.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import ast
 import json
 import logging
 import os
 import time
 from dotenv import load_dotenv
@@ -17,6 +19,18 @@ logger = logging.getLogger(__name__)
 def _model_dir(dirname: str) -> str:
     return os.path.join(os.path.dirname(os.path.abspath(__file__)), dirname)
 # ---------------------------------------------------------------------------
 # Prompt templates
 # ---------------------------------------------------------------------------
@@ -88,6 +102,89 @@ def _validate(args: dict, columns: list):
     return args
 # ---------------------------------------------------------------------------
 # Agent
 # ---------------------------------------------------------------------------
@@ -101,16 +198,19 @@ class LLM_Agent:
         self._bart_model = None
         self._qwen_tokenizer = None
         self._qwen_model = None
     # -- model runners -------------------------------------------------------
     def _run_qwen(self, user_msg: str) -> str:
         if self._qwen_model is None:
             from transformers import AutoModelForCausalLM, AutoTokenizer
-            # Prefer a local model path in Spaces to avoid any runtime network dependency.
-            model_id = os.getenv("QWEN_LOCAL_PATH", "")
             if not model_id:
                 raise ValueError("Qwen local model is not configured in this Space")
             logger.info("Loading Qwen model (first request)...")
             self._qwen_tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
             self._qwen_model = AutoModelForCausalLM.from_pretrained(model_id, local_files_only=True)
@@ -162,7 +262,9 @@ class LLM_Agent:
     def _run_bart(self, query: str) -> str:
         if self._bart_model is None:
             from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-            model_id = os.getenv("BART_LOCAL_PATH", _model_dir("fine-tuned-bart-large"))
             logger.info("Loading BART model (first request)...")
             self._bart_tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
             self._bart_model = AutoModelForSeq2SeqLM.from_pretrained(model_id, local_files_only=True)
@@ -218,8 +320,8 @@ class LLM_Agent:
             raw_text = str(exc)
         if not plot_args:
-            logger.warning("Falling back to default plot args")
-            plot_args = default_args
         try:
             chart_result = self.chart_generator.generate_chart(plot_args)

 import ast
+import difflib
 import json
 import logging
 import os
+import re
 import time
 from dotenv import load_dotenv
 def _model_dir(dirname: str) -> str:
     return os.path.join(os.path.dirname(os.path.abspath(__file__)), dirname)
+def _has_model_weights(model_dir: str) -> bool:
+    weight_files = (
+        "pytorch_model.bin",
+        "model.safetensors",
+        "tf_model.h5",
+        "flax_model.msgpack",
+    )
+    return os.path.isdir(model_dir) and any(
+        os.path.exists(os.path.join(model_dir, filename)) for filename in weight_files
+    )
 # ---------------------------------------------------------------------------
 # Prompt templates
 # ---------------------------------------------------------------------------
     return args
+def _pick_chart_type(query: str) -> str:
+    lowered = query.lower()
+    aliases = {
+        "scatter": ["scatter", "scatterplot"],
+        "bar": ["bar", "column"],
+        "pie": ["pie", "donut"],
+        "histogram": ["histogram", "distribution"],
+        "box": ["box", "boxplot"],
+        "area": ["area"],
+        "line": ["line", "trend", "over time", "over the years"],
+    }
+    for chart_type, keywords in aliases.items():
+        if any(keyword in lowered for keyword in keywords):
+            return chart_type
+    return "line"
+def _pick_color(query: str):
+    lowered = query.lower()
+    colors = [
+        "red", "blue", "green", "yellow", "orange", "purple", "pink",
+        "black", "white", "gray", "grey", "cyan", "teal", "indigo",
+    ]
+    for color in colors:
+        if re.search(rf"\b{re.escape(color)}\b", lowered):
+            return color
+    return None
+def _pick_columns(query: str, columns: list, dtypes: dict):
+    lowered = query.lower()
+    query_tokens = re.findall(r"[a-zA-Z0-9_]+", lowered)
+    def score_column(column: str) -> float:
+        col_lower = column.lower()
+        score = 0.0
+        if col_lower in lowered:
+            score += 10.0
+        for token in query_tokens:
+            if token and token in col_lower:
+                score += 2.0
+        score += difflib.SequenceMatcher(None, lowered, col_lower).ratio()
+        return score
+    sorted_columns = sorted(columns, key=score_column, reverse=True)
+    numeric_columns = [col for col in columns if dtypes.get(col) in {"integer", "float"}]
+    temporal_columns = [col for col in columns if dtypes.get(col) == "datetime"]
+    year_like = [col for col in columns if "year" in col.lower() or "date" in col.lower() or "month" in col.lower()]
+    x_col = None
+    for candidate in year_like + temporal_columns + sorted_columns:
+        if candidate in columns:
+            x_col = candidate
+            break
+    if x_col is None and columns:
+        x_col = columns[0]
+    y_candidates = [col for col in sorted_columns if col != x_col and col in numeric_columns]
+    if not y_candidates:
+        y_candidates = [col for col in numeric_columns if col != x_col]
+    if not y_candidates:
+        y_candidates = [col for col in columns if col != x_col]
+    return x_col, y_candidates[:1]
+def _heuristic_plot_args(query: str, columns: list, dtypes: dict) -> dict:
+    x_col, y_cols = _pick_columns(query, columns, dtypes)
+    if not x_col:
+        x_col = "Year"
+    if not y_cols:
+        fallback_y = next((col for col in columns if col != x_col), columns[:1])
+        y_cols = list(fallback_y) if isinstance(fallback_y, tuple) else fallback_y
+        if isinstance(y_cols, str):
+            y_cols = [y_cols]
+    return {
+        "x": x_col,
+        "y": y_cols,
+        "chart_type": _pick_chart_type(query),
+        "color": _pick_color(query),
+    }
 # ---------------------------------------------------------------------------
 # Agent
 # ---------------------------------------------------------------------------
         self._bart_model = None
         self._qwen_tokenizer = None
         self._qwen_model = None
+        self._bart_model_dir = os.getenv("BART_LOCAL_PATH", _model_dir("fine-tuned-bart-large"))
+        self._qwen_model_dir = os.getenv("QWEN_LOCAL_PATH", "")
     # -- model runners -------------------------------------------------------
     def _run_qwen(self, user_msg: str) -> str:
         if self._qwen_model is None:
             from transformers import AutoModelForCausalLM, AutoTokenizer
+            model_id = self._qwen_model_dir
             if not model_id:
                 raise ValueError("Qwen local model is not configured in this Space")
+            if not _has_model_weights(model_id):
+                raise ValueError(f"Qwen model weights not found in {model_id}")
             logger.info("Loading Qwen model (first request)...")
             self._qwen_tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
             self._qwen_model = AutoModelForCausalLM.from_pretrained(model_id, local_files_only=True)
     def _run_bart(self, query: str) -> str:
         if self._bart_model is None:
             from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+            model_id = self._bart_model_dir
+            if not _has_model_weights(model_id):
+                raise ValueError(f"BART model weights not found in {model_id}")
             logger.info("Loading BART model (first request)...")
             self._bart_tokenizer = AutoTokenizer.from_pretrained(model_id, local_files_only=True)
             self._bart_model = AutoModelForSeq2SeqLM.from_pretrained(model_id, local_files_only=True)
             raw_text = str(exc)
         if not plot_args:
+            logger.warning("Falling back to heuristic plot args")
+            plot_args = _validate(_heuristic_plot_args(query, columns, dtypes), columns) or default_args
         try:
             chart_result = self.chart_generator.generate_chart(plot_args)