Spaces:
Sleeping
Sleeping
Deploy FlakyGym UI + inference updates (minimal upload)
Browse files- inference.py +7 -0
inference.py
CHANGED
|
@@ -431,6 +431,7 @@ def run_episode(
|
|
| 431 |
action: FlakySleuthAction
|
| 432 |
action_source = "heuristic"
|
| 433 |
llm_meta: dict[str, Any] = {"attempted": False, "raw_output": "", "error": ""}
|
|
|
|
| 434 |
try:
|
| 435 |
candidate, llm_meta = llm_action(messages)
|
| 436 |
if candidate is not None:
|
|
@@ -474,6 +475,7 @@ def run_episode(
|
|
| 474 |
reason_key = "empty_or_invalid_response"
|
| 475 |
else:
|
| 476 |
reason_key = "heuristic_default"
|
|
|
|
| 477 |
fallback_reasons[reason_key] = fallback_reasons.get(reason_key, 0) + 1
|
| 478 |
|
| 479 |
if trace_agent and not compliance_stdout:
|
|
@@ -526,6 +528,11 @@ def run_episode(
|
|
| 526 |
step_error = str(raw_err)
|
| 527 |
if not step_error and obs.tool_output and str(obs.tool_output).startswith("ERROR:"):
|
| 528 |
step_error = str(obs.tool_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
|
| 530 |
if compliance_stdout:
|
| 531 |
_compliance_log_step(
|
|
|
|
| 431 |
action: FlakySleuthAction
|
| 432 |
action_source = "heuristic"
|
| 433 |
llm_meta: dict[str, Any] = {"attempted": False, "raw_output": "", "error": ""}
|
| 434 |
+
step_fallback_reason: str | None = None
|
| 435 |
try:
|
| 436 |
candidate, llm_meta = llm_action(messages)
|
| 437 |
if candidate is not None:
|
|
|
|
| 475 |
reason_key = "empty_or_invalid_response"
|
| 476 |
else:
|
| 477 |
reason_key = "heuristic_default"
|
| 478 |
+
step_fallback_reason = reason_key
|
| 479 |
fallback_reasons[reason_key] = fallback_reasons.get(reason_key, 0) + 1
|
| 480 |
|
| 481 |
if trace_agent and not compliance_stdout:
|
|
|
|
| 528 |
step_error = str(raw_err)
|
| 529 |
if not step_error and obs.tool_output and str(obs.tool_output).startswith("ERROR:"):
|
| 530 |
step_error = str(obs.tool_output)
|
| 531 |
+
if step_fallback_reason:
|
| 532 |
+
if step_error:
|
| 533 |
+
step_error = f"{step_error}; llm_fallback:{step_fallback_reason}"
|
| 534 |
+
else:
|
| 535 |
+
step_error = f"llm_fallback:{step_fallback_reason}"
|
| 536 |
|
| 537 |
if compliance_stdout:
|
| 538 |
_compliance_log_step(
|