|
|
|
|
|
import os |
|
|
import re |
|
|
import json |
|
|
import base64 |
|
|
import mimetypes |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
import pandas as pd |
|
|
import matplotlib.pyplot as plt |
|
|
from PIL import Image |
|
|
from dotenv import load_dotenv |
|
|
from openai import OpenAI |
|
|
from anthropic import Anthropic |
|
|
from html import escape |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
openai_api_key = os.getenv("OPENAI_API_KEY", "").strip() |
|
|
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "").strip() |
|
|
|
|
|
|
|
|
openai_client = OpenAI(api_key=openai_api_key) if openai_api_key else OpenAI() |
|
|
anthropic_client = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else Anthropic() |
|
|
|
|
|
|
|
|
def get_response(model: str, prompt: str) -> str: |
|
|
"""Get response from LLM (OpenAI or Anthropic).""" |
|
|
if "claude" in model.lower() or "anthropic" in model.lower(): |
|
|
|
|
|
if not anthropic_api_key: |
|
|
raise ValueError("ANTHROPIC_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") |
|
|
|
|
|
message = anthropic_client.messages.create( |
|
|
model=model, |
|
|
max_tokens=1000, |
|
|
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}], |
|
|
) |
|
|
return message.content[0].text |
|
|
else: |
|
|
|
|
|
if not openai_api_key: |
|
|
raise ValueError("OPENAI_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") |
|
|
|
|
|
response = openai_client.responses.create( |
|
|
model=model, |
|
|
input=prompt, |
|
|
) |
|
|
return response.output_text |
|
|
|
|
|
|
|
|
|
|
|
def load_and_prepare_data(csv_path: str) -> pd.DataFrame: |
|
|
"""Load CSV and derive date parts commonly used in charts.""" |
|
|
df = pd.read_csv(csv_path) |
|
|
|
|
|
if "date" in df.columns: |
|
|
df["date"] = pd.to_datetime(df["date"], errors="coerce") |
|
|
df["quarter"] = df["date"].dt.quarter |
|
|
df["month"] = df["date"].dt.month |
|
|
df["year"] = df["date"].dt.year |
|
|
return df |
|
|
|
|
|
|
|
|
|
|
|
def make_schema_text(df: pd.DataFrame) -> str: |
|
|
"""Return a human-readable schema from a DataFrame.""" |
|
|
return "\n".join(f"- {c}: {dt}" for c, dt in df.dtypes.items()) |
|
|
|
|
|
|
|
|
def ensure_execute_python_tags(text: str) -> str: |
|
|
"""Normalize code to be wrapped in <execute_python>...</execute_python>.""" |
|
|
text = text.strip() |
|
|
|
|
|
text = re.sub(r"^```(?:python)?\s*|\s*```$", "", text).strip() |
|
|
if "<execute_python>" not in text: |
|
|
text = f"<execute_python>\n{text}\n</execute_python>" |
|
|
return text |
|
|
|
|
|
|
|
|
def encode_image_b64(path: str) -> tuple[str, str]: |
|
|
"""Return (media_type, base64_str) for an image file path.""" |
|
|
mime, _ = mimetypes.guess_type(path) |
|
|
media_type = mime or "image/png" |
|
|
with open(path, "rb") as f: |
|
|
b64 = base64.b64encode(f.read()).decode("utf-8") |
|
|
return media_type, b64 |
|
|
|
|
|
|
|
|
def image_anthropic_call(model_name: str, prompt: str, media_type: str, b64: str) -> str: |
|
|
""" |
|
|
Call Anthropic Claude (messages.create) with text+image and return *all* text blocks concatenated. |
|
|
Adds a system message to enforce strict JSON output. |
|
|
""" |
|
|
if not anthropic_api_key: |
|
|
raise ValueError("ANTHROPIC_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") |
|
|
msg = anthropic_client.messages.create( |
|
|
model=model_name, |
|
|
max_tokens=2000, |
|
|
temperature=0, |
|
|
system=( |
|
|
"You are a careful assistant. Respond with a single valid JSON object only. " |
|
|
"Do not include markdown, code fences, or commentary outside JSON." |
|
|
), |
|
|
messages=[{ |
|
|
"role": "user", |
|
|
"content": [ |
|
|
{"type": "text", "text": prompt}, |
|
|
{"type": "image", "source": {"type": "base64", "media_type": media_type, "data": b64}}, |
|
|
], |
|
|
}], |
|
|
) |
|
|
|
|
|
|
|
|
parts = [] |
|
|
for block in (msg.content or []): |
|
|
if getattr(block, "type", None) == "text": |
|
|
parts.append(block.text) |
|
|
return "".join(parts).strip() |
|
|
|
|
|
|
|
|
def image_openai_call(model_name: str, prompt: str, media_type: str, b64: str) -> str: |
|
|
"""Call OpenAI with text+image input.""" |
|
|
if not openai_api_key: |
|
|
raise ValueError("OPENAI_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.") |
|
|
data_url = f"data:{media_type};base64,{b64}" |
|
|
resp = openai_client.responses.create( |
|
|
model=model_name, |
|
|
input=[ |
|
|
{ |
|
|
"role": "user", |
|
|
"content": [ |
|
|
{"type": "input_text", "text": prompt}, |
|
|
{"type": "input_image", "image_url": data_url}, |
|
|
], |
|
|
} |
|
|
], |
|
|
) |
|
|
content = (resp.output_text or "").strip() |
|
|
return content |
|
|
|