Verdiola commited on
Commit
9aa17d8
·
verified ·
1 Parent(s): 40f773e

try to fix got5-mini

Browse files
Files changed (1) hide show
  1. app/main.py +55 -6
app/main.py CHANGED
@@ -34,6 +34,32 @@ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
34
  USE_OPENAI = os.getenv("USE_OPENAI", "").lower() == "true"
35
  PORT = int(os.getenv("PORT", "7860"))
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  gemini_client = genai.Client(api_key=API_KEY) if API_KEY else None
38
  gpt_client = OpenAI(api_key=OPENAI_API_KEY) if (OPENAI_API_KEY and OpenAI and USE_OPENAI) else None
39
 
@@ -162,6 +188,21 @@ def _build_responses_input(system: str, contents: Any) -> List[Dict[str, Any]]:
162
  {"role": "user", "content": _build_openai_content(contents, for_chat=False)},
163
  ]
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  def _extract_chat_content(resp: Any) -> str:
167
  content = resp.choices[0].message.content
@@ -199,17 +240,22 @@ def _invoke_gpt_model(model: str, system: str, contents: Any) -> str:
199
  if not gpt_client:
200
  raise RuntimeError("GPT client is not configured")
201
  messages = _build_chat_messages(system, contents)
 
 
 
 
 
 
202
  try:
203
  resp = gpt_client.chat.completions.create(model=model, messages=messages)
204
  return _extract_chat_content(resp)
205
  except Exception as err:
206
- message = str(err)
207
- if "only supported in v1/responses" not in message:
208
  raise
209
- resp = gpt_client.responses.create(
210
- model=model,
211
- input=_build_responses_input(system, contents),
212
- )
213
  return _extract_responses_content(resp)
214
 
215
 
@@ -218,6 +264,7 @@ def gemini_call(*, system: str, contents):
218
  if not gemini_client:
219
  raise RuntimeError("Gemini client is not configured")
220
  limiter.acquire()
 
221
  resp = gemini_client.models.generate_content(
222
  model=MODEL,
223
  config=types.GenerateContentConfig(system_instruction=system),
@@ -231,10 +278,12 @@ def gemini_small_call(*, system: str, contents: str) -> str:
231
  storyboard_limiter.acquire()
232
  if gpt_client:
233
  target_model = OPENAI_SMALL_MODEL
 
234
  return _invoke_gpt_model(target_model, system, contents)
235
  if not gemini_client:
236
  raise RuntimeError("Gemini client is not configured")
237
  fallback_model = GEMINI_SMALL_MODEL or MODEL
 
238
  resp = gemini_client.models.generate_content(
239
  model=fallback_model,
240
  config=types.GenerateContentConfig(system_instruction=system),
 
34
  USE_OPENAI = os.getenv("USE_OPENAI", "").lower() == "true"
35
  PORT = int(os.getenv("PORT", "7860"))
36
 
37
+ _OPENAI_RESPONSES_MODELS_ENV = os.getenv("OPENAI_RESPONSES_MODELS", "")
38
+ RESPONSES_API_MODEL_NAMES = {"gpt-5-mini"}
39
+ if _OPENAI_RESPONSES_MODELS_ENV:
40
+ RESPONSES_API_MODEL_NAMES.update(
41
+ model.strip().lower()
42
+ for model in _OPENAI_RESPONSES_MODELS_ENV.split(",")
43
+ if model.strip()
44
+ )
45
+
46
+ _OPENAI_RESPONSES_PREFIXES_ENV = os.getenv("OPENAI_RESPONSES_PREFIXES", "")
47
+ _RESPONSES_API_MODEL_PREFIXES = ["gpt-5"]
48
+ if _OPENAI_RESPONSES_PREFIXES_ENV:
49
+ _RESPONSES_API_MODEL_PREFIXES.extend(
50
+ prefix.strip().lower()
51
+ for prefix in _OPENAI_RESPONSES_PREFIXES_ENV.split(",")
52
+ if prefix.strip()
53
+ )
54
+ RESPONSES_API_MODEL_PREFIXES = tuple(_RESPONSES_API_MODEL_PREFIXES)
55
+ RESPONSES_API_ERROR_HINTS = (
56
+ "only supported in v1/responses",
57
+ "use the responses api",
58
+ "use the responses endpoint",
59
+ "please call the responses api",
60
+ "please use the responses endpoint",
61
+ )
62
+
63
  gemini_client = genai.Client(api_key=API_KEY) if API_KEY else None
64
  gpt_client = OpenAI(api_key=OPENAI_API_KEY) if (OPENAI_API_KEY and OpenAI and USE_OPENAI) else None
65
 
 
188
  {"role": "user", "content": _build_openai_content(contents, for_chat=False)},
189
  ]
190
 
191
+ def _requires_responses_api(model: str) -> bool:
192
+ lowered = (model or "").lower()
193
+ if not lowered:
194
+ return False
195
+ if lowered in RESPONSES_API_MODEL_NAMES:
196
+ return True
197
+ return any(
198
+ prefix and lowered.startswith(prefix)
199
+ for prefix in RESPONSES_API_MODEL_PREFIXES
200
+ )
201
+
202
+
203
+ def _should_use_responses_fallback(err: Exception) -> bool:
204
+ message = str(err).lower()
205
+ return any(hint in message for hint in RESPONSES_API_ERROR_HINTS)
206
 
207
  def _extract_chat_content(resp: Any) -> str:
208
  content = resp.choices[0].message.content
 
240
  if not gpt_client:
241
  raise RuntimeError("GPT client is not configured")
242
  messages = _build_chat_messages(system, contents)
243
+ print(f"[Storyboard] OpenAI request model={model}", file=sys.stderr)
244
+ responses_input: Optional[List[Dict[str, Any]]] = None
245
+ if _requires_responses_api(model):
246
+ responses_input = _build_responses_input(system, contents)
247
+ resp = gpt_client.responses.create(model=model, input=responses_input)
248
+ return _extract_responses_content(resp)
249
  try:
250
  resp = gpt_client.chat.completions.create(model=model, messages=messages)
251
  return _extract_chat_content(resp)
252
  except Exception as err:
253
+ if not _should_use_responses_fallback(err):
 
254
  raise
255
+ if responses_input is None:
256
+ responses_input = _build_responses_input(system, contents)
257
+ print(f"[Storyboard] OpenAI responses fallback model={model}", file=sys.stderr)
258
+ resp = gpt_client.responses.create(model=model, input=responses_input)
259
  return _extract_responses_content(resp)
260
 
261
 
 
264
  if not gemini_client:
265
  raise RuntimeError("Gemini client is not configured")
266
  limiter.acquire()
267
+ print(f"[Storyboard] Gemini request model={MODEL}", file=sys.stderr)
268
  resp = gemini_client.models.generate_content(
269
  model=MODEL,
270
  config=types.GenerateContentConfig(system_instruction=system),
 
278
  storyboard_limiter.acquire()
279
  if gpt_client:
280
  target_model = OPENAI_SMALL_MODEL
281
+ print(f"[Storyboard] Using OpenAI small model={target_model}", file=sys.stderr)
282
  return _invoke_gpt_model(target_model, system, contents)
283
  if not gemini_client:
284
  raise RuntimeError("Gemini client is not configured")
285
  fallback_model = GEMINI_SMALL_MODEL or MODEL
286
+ print(f"[Storyboard] OpenAI disabled; using Gemini fallback model={fallback_model}", file=sys.stderr)
287
  resp = gemini_client.models.generate_content(
288
  model=fallback_model,
289
  config=types.GenerateContentConfig(system_instruction=system),