Omartificial-Intelligence-Space commited on
Commit
83721a5
·
verified ·
1 Parent(s): b5f77f6

Create utils.py

Browse files
Files changed (1) hide show
  1. utils.py +131 -0
utils.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # === Standard Library ===
2
+ import os
3
+ import re
4
+ import json
5
+ import base64
6
+ import mimetypes
7
+ from pathlib import Path
8
+
9
+ # === Third-Party ===
10
+ import pandas as pd
11
+ import matplotlib.pyplot as plt
12
+ from PIL import Image
13
+ from dotenv import load_dotenv
14
+ from openai import OpenAI
15
+ from anthropic import Anthropic
16
+ from html import escape
17
+
18
+ # === Env & Clients ===
19
+ load_dotenv()
20
+ openai_api_key = os.getenv("OPENAI_API_KEY")
21
+ anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
22
+
23
+ # Both clients read keys from env by default; explicit is also fine:
24
+ openai_client = OpenAI(api_key=openai_api_key) if openai_api_key else OpenAI()
25
+ anthropic_client = Anthropic(api_key=anthropic_api_key) if anthropic_api_key else Anthropic()
26
+
27
+
28
+ def get_response(model: str, prompt: str) -> str:
29
+ """Get response from LLM (OpenAI or Anthropic)."""
30
+ if "claude" in model.lower() or "anthropic" in model.lower():
31
+ # Anthropic Claude format
32
+ message = anthropic_client.messages.create(
33
+ model=model,
34
+ max_tokens=1000,
35
+ messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
36
+ )
37
+ return message.content[0].text
38
+ else:
39
+ # Default to OpenAI format for all other models (gpt-4, o3-mini, o1, etc.)
40
+ response = openai_client.responses.create(
41
+ model=model,
42
+ input=prompt,
43
+ )
44
+ return response.output_text
45
+
46
+
47
+ # === Data Loading ===
48
+ def load_and_prepare_data(csv_path: str) -> pd.DataFrame:
49
+ """Load CSV and derive date parts commonly used in charts."""
50
+ df = pd.read_csv(csv_path)
51
+ # Be tolerant if 'date' exists
52
+ if "date" in df.columns:
53
+ df["date"] = pd.to_datetime(df["date"], errors="coerce")
54
+ df["quarter"] = df["date"].dt.quarter
55
+ df["month"] = df["date"].dt.month
56
+ df["year"] = df["date"].dt.year
57
+ return df
58
+
59
+
60
+ # === Helpers ===
61
+ def make_schema_text(df: pd.DataFrame) -> str:
62
+ """Return a human-readable schema from a DataFrame."""
63
+ return "\n".join(f"- {c}: {dt}" for c, dt in df.dtypes.items())
64
+
65
+
66
+ def ensure_execute_python_tags(text: str) -> str:
67
+ """Normalize code to be wrapped in <execute_python>...</execute_python>."""
68
+ text = text.strip()
69
+ # Strip ```python fences if present
70
+ text = re.sub(r"^```(?:python)?\s*|\s*```$", "", text).strip()
71
+ if "<execute_python>" not in text:
72
+ text = f"<execute_python>\n{text}\n</execute_python>"
73
+ return text
74
+
75
+
76
+ def encode_image_b64(path: str) -> tuple[str, str]:
77
+ """Return (media_type, base64_str) for an image file path."""
78
+ mime, _ = mimetypes.guess_type(path)
79
+ media_type = mime or "image/png"
80
+ with open(path, "rb") as f:
81
+ b64 = base64.b64encode(f.read()).decode("utf-8")
82
+ return media_type, b64
83
+
84
+
85
+ def image_anthropic_call(model_name: str, prompt: str, media_type: str, b64: str) -> str:
86
+ """
87
+ Call Anthropic Claude (messages.create) with text+image and return *all* text blocks concatenated.
88
+ Adds a system message to enforce strict JSON output.
89
+ """
90
+ msg = anthropic_client.messages.create(
91
+ model=model_name,
92
+ max_tokens=2000,
93
+ temperature=0,
94
+ system=(
95
+ "You are a careful assistant. Respond with a single valid JSON object only. "
96
+ "Do not include markdown, code fences, or commentary outside JSON."
97
+ ),
98
+ messages=[{
99
+ "role": "user",
100
+ "content": [
101
+ {"type": "text", "text": prompt},
102
+ {"type": "image", "source": {"type": "base64", "media_type": media_type, "data": b64}},
103
+ ],
104
+ }],
105
+ )
106
+
107
+ # Anthropic returns a list of content blocks; collect all text
108
+ parts = []
109
+ for block in (msg.content or []):
110
+ if getattr(block, "type", None) == "text":
111
+ parts.append(block.text)
112
+ return "".join(parts).strip()
113
+
114
+
115
+ def image_openai_call(model_name: str, prompt: str, media_type: str, b64: str) -> str:
116
+ """Call OpenAI with text+image input."""
117
+ data_url = f"data:{media_type};base64,{b64}"
118
+ resp = openai_client.responses.create(
119
+ model=model_name,
120
+ input=[
121
+ {
122
+ "role": "user",
123
+ "content": [
124
+ {"type": "input_text", "text": prompt},
125
+ {"type": "input_image", "image_url": data_url},
126
+ ],
127
+ }
128
+ ],
129
+ )
130
+ content = (resp.output_text or "").strip()
131
+ return content