File size: 5,175 Bytes
83721a5 914c532 83721a5 914c532 83721a5 914c532 83721a5 914c532 83721a5 914c532 83721a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# === Standard Library ===
import os
import re
import json
import base64
import mimetypes
from pathlib import Path
# === Third-Party ===
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from dotenv import load_dotenv
from openai import OpenAI
from anthropic import Anthropic
from html import escape
# === Env & Clients ===
load_dotenv()
# Strip whitespace/newlines from API keys (common issue with env vars)
openai_api_key = os.getenv("OPENAI_API_KEY", "").strip()
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
# Both clients read keys from env by default; explicit is also fine:
openai_client = OpenAI(api_key=openai_api_key) if openai_api_key else OpenAI()
anthropic_client = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else Anthropic()
def get_response(model: str, prompt: str) -> str:
"""Get response from LLM (OpenAI or Anthropic)."""
if "claude" in model.lower() or "anthropic" in model.lower():
# Check if Anthropic API key is available
if not anthropic_api_key:
raise ValueError("ANTHROPIC_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.")
# Anthropic Claude format
message = anthropic_client.messages.create(
model=model,
max_tokens=1000,
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
)
return message.content[0].text
else:
# Check if OpenAI API key is available
if not openai_api_key:
raise ValueError("OPENAI_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.")
# Default to OpenAI format for all other models (gpt-4, o3-mini, o1, etc.)
response = openai_client.responses.create(
model=model,
input=prompt,
)
return response.output_text
# === Data Loading ===
def load_and_prepare_data(csv_path: str) -> pd.DataFrame:
"""Load CSV and derive date parts commonly used in charts."""
df = pd.read_csv(csv_path)
# Be tolerant if 'date' exists
if "date" in df.columns:
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["quarter"] = df["date"].dt.quarter
df["month"] = df["date"].dt.month
df["year"] = df["date"].dt.year
return df
# === Helpers ===
def make_schema_text(df: pd.DataFrame) -> str:
"""Return a human-readable schema from a DataFrame."""
return "\n".join(f"- {c}: {dt}" for c, dt in df.dtypes.items())
def ensure_execute_python_tags(text: str) -> str:
"""Normalize code to be wrapped in <execute_python>...</execute_python>."""
text = text.strip()
# Strip ```python fences if present
text = re.sub(r"^```(?:python)?\s*|\s*```$", "", text).strip()
if "<execute_python>" not in text:
text = f"<execute_python>\n{text}\n</execute_python>"
return text
def encode_image_b64(path: str) -> tuple[str, str]:
"""Return (media_type, base64_str) for an image file path."""
mime, _ = mimetypes.guess_type(path)
media_type = mime or "image/png"
with open(path, "rb") as f:
b64 = base64.b64encode(f.read()).decode("utf-8")
return media_type, b64
def image_anthropic_call(model_name: str, prompt: str, media_type: str, b64: str) -> str:
"""
Call Anthropic Claude (messages.create) with text+image and return *all* text blocks concatenated.
Adds a system message to enforce strict JSON output.
"""
if not anthropic_api_key:
raise ValueError("ANTHROPIC_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.")
msg = anthropic_client.messages.create(
model=model_name,
max_tokens=2000,
temperature=0,
system=(
"You are a careful assistant. Respond with a single valid JSON object only. "
"Do not include markdown, code fences, or commentary outside JSON."
),
messages=[{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image", "source": {"type": "base64", "media_type": media_type, "data": b64}},
],
}],
)
# Anthropic returns a list of content blocks; collect all text
parts = []
for block in (msg.content or []):
if getattr(block, "type", None) == "text":
parts.append(block.text)
return "".join(parts).strip()
def image_openai_call(model_name: str, prompt: str, media_type: str, b64: str) -> str:
"""Call OpenAI with text+image input."""
if not openai_api_key:
raise ValueError("OPENAI_API_KEY not set. Please add it as a secret in HuggingFace Spaces settings.")
data_url = f"data:{media_type};base64,{b64}"
resp = openai_client.responses.create(
model=model_name,
input=[
{
"role": "user",
"content": [
{"type": "input_text", "text": prompt},
{"type": "input_image", "image_url": data_url},
],
}
],
)
content = (resp.output_text or "").strip()
return content
|