# === Standard Library === import os import re import json import base64 import mimetypes from pathlib import Path # === Third-Party === import pandas as pd import matplotlib.pyplot as plt from PIL import Image from dotenv import load_dotenv from openai import OpenAI from anthropic import Anthropic from html import escape # === Env & Clients === load_dotenv() # Strip whitespace/newlines from API keys (common issue with env vars) openai_api_key = os.getenv("OPENAI_API_KEY", "").strip() anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "").strip() # Both clients read keys from env by default; explicit is also fine: openai_client = OpenAI(api_key=openai_api_key) if openai_api_key else OpenAI() anthropic_client = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else Anthropic() def get_response(model: str, prompt: str) -> str: """Get response from LLM (OpenAI or Anthropic).""" if "claude" in model.lower() or "anthropic" in model.lower(): # Check if Anthropic API key is available if not anthropic_api_key: raise ValueError("ANTHROPIC_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") # Anthropic Claude format message = anthropic_client.messages.create( model=model, max_tokens=1000, messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}], ) return message.content[0].text else: # Check if OpenAI API key is available if not openai_api_key: raise ValueError("OPENAI_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") # Default to OpenAI format for all other models (gpt-4, o3-mini, o1, etc.) response = openai_client.responses.create( model=model, input=prompt, ) return response.output_text # === Data Loading === def load_and_prepare_data(csv_path: str) -> pd.DataFrame: """Load CSV and derive date parts commonly used in charts.""" df = pd.read_csv(csv_path) # Be tolerant if 'date' exists if "date" in df.columns: df["date"] = pd.to_datetime(df["date"], errors="coerce") df["quarter"] = df["date"].dt.quarter df["month"] = df["date"].dt.month df["year"] = df["date"].dt.year return df # === Helpers === def make_schema_text(df: pd.DataFrame) -> str: """Return a human-readable schema from a DataFrame.""" return "\n".join(f"- {c}: {dt}" for c, dt in df.dtypes.items()) def ensure_execute_python_tags(text: str) -> str: """Normalize code to be wrapped in ....""" text = text.strip() # Strip ```python fences if present text = re.sub(r"^```(?:python)?\s*|\s*```$", "", text).strip() if "" not in text: text = f"\n{text}\n" return text def encode_image_b64(path: str) -> tuple[str, str]: """Return (media_type, base64_str) for an image file path.""" mime, _ = mimetypes.guess_type(path) media_type = mime or "image/png" with open(path, "rb") as f: b64 = base64.b64encode(f.read()).decode("utf-8") return media_type, b64 def image_anthropic_call(model_name: str, prompt: str, media_type: str, b64: str) -> str: """ Call Anthropic Claude (messages.create) with text+image and return *all* text blocks concatenated. Adds a system message to enforce strict JSON output. """ if not anthropic_api_key: raise ValueError("ANTHROPIC_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") msg = anthropic_client.messages.create( model=model_name, max_tokens=2000, temperature=0, system=( "You are a careful assistant. Respond with a single valid JSON object only. " "Do not include markdown, code fences, or commentary outside JSON." ), messages=[{ "role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image", "source": {"type": "base64", "media_type": media_type, "data": b64}}, ], }], ) # Anthropic returns a list of content blocks; collect all text parts = [] for block in (msg.content or []): if getattr(block, "type", None) == "text": parts.append(block.text) return "".join(parts).strip() def image_openai_call(model_name: str, prompt: str, media_type: str, b64: str) -> str: """Call OpenAI with text+image input.""" if not openai_api_key: raise ValueError("OPENAI_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") data_url = f"data:{media_type};base64,{b64}" resp = openai_client.responses.create( model=model_name, input=[ { "role": "user", "content": [ {"type": "input_text", "text": prompt}, {"type": "input_image", "image_url": data_url}, ], } ], ) content = (resp.output_text or "").strip() return content