Spaces:
Sleeping
Sleeping
try to fix got5-mini
Browse files- app/main.py +55 -6
app/main.py
CHANGED
|
@@ -34,6 +34,32 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
| 34 |
USE_OPENAI = os.getenv("USE_OPENAI", "").lower() == "true"
|
| 35 |
PORT = int(os.getenv("PORT", "7860"))
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
gemini_client = genai.Client(api_key=API_KEY) if API_KEY else None
|
| 38 |
gpt_client = OpenAI(api_key=OPENAI_API_KEY) if (OPENAI_API_KEY and OpenAI and USE_OPENAI) else None
|
| 39 |
|
|
@@ -162,6 +188,21 @@ def _build_responses_input(system: str, contents: Any) -> List[Dict[str, Any]]:
|
|
| 162 |
{"role": "user", "content": _build_openai_content(contents, for_chat=False)},
|
| 163 |
]
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
def _extract_chat_content(resp: Any) -> str:
|
| 167 |
content = resp.choices[0].message.content
|
|
@@ -199,17 +240,22 @@ def _invoke_gpt_model(model: str, system: str, contents: Any) -> str:
|
|
| 199 |
if not gpt_client:
|
| 200 |
raise RuntimeError("GPT client is not configured")
|
| 201 |
messages = _build_chat_messages(system, contents)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
try:
|
| 203 |
resp = gpt_client.chat.completions.create(model=model, messages=messages)
|
| 204 |
return _extract_chat_content(resp)
|
| 205 |
except Exception as err:
|
| 206 |
-
|
| 207 |
-
if "only supported in v1/responses" not in message:
|
| 208 |
raise
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
)
|
| 213 |
return _extract_responses_content(resp)
|
| 214 |
|
| 215 |
|
|
@@ -218,6 +264,7 @@ def gemini_call(*, system: str, contents):
|
|
| 218 |
if not gemini_client:
|
| 219 |
raise RuntimeError("Gemini client is not configured")
|
| 220 |
limiter.acquire()
|
|
|
|
| 221 |
resp = gemini_client.models.generate_content(
|
| 222 |
model=MODEL,
|
| 223 |
config=types.GenerateContentConfig(system_instruction=system),
|
|
@@ -231,10 +278,12 @@ def gemini_small_call(*, system: str, contents: str) -> str:
|
|
| 231 |
storyboard_limiter.acquire()
|
| 232 |
if gpt_client:
|
| 233 |
target_model = OPENAI_SMALL_MODEL
|
|
|
|
| 234 |
return _invoke_gpt_model(target_model, system, contents)
|
| 235 |
if not gemini_client:
|
| 236 |
raise RuntimeError("Gemini client is not configured")
|
| 237 |
fallback_model = GEMINI_SMALL_MODEL or MODEL
|
|
|
|
| 238 |
resp = gemini_client.models.generate_content(
|
| 239 |
model=fallback_model,
|
| 240 |
config=types.GenerateContentConfig(system_instruction=system),
|
|
|
|
| 34 |
USE_OPENAI = os.getenv("USE_OPENAI", "").lower() == "true"
|
| 35 |
PORT = int(os.getenv("PORT", "7860"))
|
| 36 |
|
| 37 |
+
_OPENAI_RESPONSES_MODELS_ENV = os.getenv("OPENAI_RESPONSES_MODELS", "")
|
| 38 |
+
RESPONSES_API_MODEL_NAMES = {"gpt-5-mini"}
|
| 39 |
+
if _OPENAI_RESPONSES_MODELS_ENV:
|
| 40 |
+
RESPONSES_API_MODEL_NAMES.update(
|
| 41 |
+
model.strip().lower()
|
| 42 |
+
for model in _OPENAI_RESPONSES_MODELS_ENV.split(",")
|
| 43 |
+
if model.strip()
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
_OPENAI_RESPONSES_PREFIXES_ENV = os.getenv("OPENAI_RESPONSES_PREFIXES", "")
|
| 47 |
+
_RESPONSES_API_MODEL_PREFIXES = ["gpt-5"]
|
| 48 |
+
if _OPENAI_RESPONSES_PREFIXES_ENV:
|
| 49 |
+
_RESPONSES_API_MODEL_PREFIXES.extend(
|
| 50 |
+
prefix.strip().lower()
|
| 51 |
+
for prefix in _OPENAI_RESPONSES_PREFIXES_ENV.split(",")
|
| 52 |
+
if prefix.strip()
|
| 53 |
+
)
|
| 54 |
+
RESPONSES_API_MODEL_PREFIXES = tuple(_RESPONSES_API_MODEL_PREFIXES)
|
| 55 |
+
RESPONSES_API_ERROR_HINTS = (
|
| 56 |
+
"only supported in v1/responses",
|
| 57 |
+
"use the responses api",
|
| 58 |
+
"use the responses endpoint",
|
| 59 |
+
"please call the responses api",
|
| 60 |
+
"please use the responses endpoint",
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
gemini_client = genai.Client(api_key=API_KEY) if API_KEY else None
|
| 64 |
gpt_client = OpenAI(api_key=OPENAI_API_KEY) if (OPENAI_API_KEY and OpenAI and USE_OPENAI) else None
|
| 65 |
|
|
|
|
| 188 |
{"role": "user", "content": _build_openai_content(contents, for_chat=False)},
|
| 189 |
]
|
| 190 |
|
| 191 |
+
def _requires_responses_api(model: str) -> bool:
|
| 192 |
+
lowered = (model or "").lower()
|
| 193 |
+
if not lowered:
|
| 194 |
+
return False
|
| 195 |
+
if lowered in RESPONSES_API_MODEL_NAMES:
|
| 196 |
+
return True
|
| 197 |
+
return any(
|
| 198 |
+
prefix and lowered.startswith(prefix)
|
| 199 |
+
for prefix in RESPONSES_API_MODEL_PREFIXES
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def _should_use_responses_fallback(err: Exception) -> bool:
|
| 204 |
+
message = str(err).lower()
|
| 205 |
+
return any(hint in message for hint in RESPONSES_API_ERROR_HINTS)
|
| 206 |
|
| 207 |
def _extract_chat_content(resp: Any) -> str:
|
| 208 |
content = resp.choices[0].message.content
|
|
|
|
| 240 |
if not gpt_client:
|
| 241 |
raise RuntimeError("GPT client is not configured")
|
| 242 |
messages = _build_chat_messages(system, contents)
|
| 243 |
+
print(f"[Storyboard] OpenAI request model={model}", file=sys.stderr)
|
| 244 |
+
responses_input: Optional[List[Dict[str, Any]]] = None
|
| 245 |
+
if _requires_responses_api(model):
|
| 246 |
+
responses_input = _build_responses_input(system, contents)
|
| 247 |
+
resp = gpt_client.responses.create(model=model, input=responses_input)
|
| 248 |
+
return _extract_responses_content(resp)
|
| 249 |
try:
|
| 250 |
resp = gpt_client.chat.completions.create(model=model, messages=messages)
|
| 251 |
return _extract_chat_content(resp)
|
| 252 |
except Exception as err:
|
| 253 |
+
if not _should_use_responses_fallback(err):
|
|
|
|
| 254 |
raise
|
| 255 |
+
if responses_input is None:
|
| 256 |
+
responses_input = _build_responses_input(system, contents)
|
| 257 |
+
print(f"[Storyboard] OpenAI responses fallback model={model}", file=sys.stderr)
|
| 258 |
+
resp = gpt_client.responses.create(model=model, input=responses_input)
|
| 259 |
return _extract_responses_content(resp)
|
| 260 |
|
| 261 |
|
|
|
|
| 264 |
if not gemini_client:
|
| 265 |
raise RuntimeError("Gemini client is not configured")
|
| 266 |
limiter.acquire()
|
| 267 |
+
print(f"[Storyboard] Gemini request model={MODEL}", file=sys.stderr)
|
| 268 |
resp = gemini_client.models.generate_content(
|
| 269 |
model=MODEL,
|
| 270 |
config=types.GenerateContentConfig(system_instruction=system),
|
|
|
|
| 278 |
storyboard_limiter.acquire()
|
| 279 |
if gpt_client:
|
| 280 |
target_model = OPENAI_SMALL_MODEL
|
| 281 |
+
print(f"[Storyboard] Using OpenAI small model={target_model}", file=sys.stderr)
|
| 282 |
return _invoke_gpt_model(target_model, system, contents)
|
| 283 |
if not gemini_client:
|
| 284 |
raise RuntimeError("Gemini client is not configured")
|
| 285 |
fallback_model = GEMINI_SMALL_MODEL or MODEL
|
| 286 |
+
print(f"[Storyboard] OpenAI disabled; using Gemini fallback model={fallback_model}", file=sys.stderr)
|
| 287 |
resp = gemini_client.models.generate_content(
|
| 288 |
model=fallback_model,
|
| 289 |
config=types.GenerateContentConfig(system_instruction=system),
|