Spaces:

vedkdev
/

FlakyTestSleuthOpenEnvRL

Sleeping

vedkdev commited on Apr 8

Commit

de8fc0f

verified ·

1 Parent(s): f53d90b

Deploy FlakyGym UI + inference updates (minimal upload)

Files changed (1) hide show

inference.py CHANGED Viewed

@@ -431,6 +431,7 @@ def run_episode(
             action: FlakySleuthAction
             action_source = "heuristic"
             llm_meta: dict[str, Any] = {"attempted": False, "raw_output": "", "error": ""}
             try:
                 candidate, llm_meta = llm_action(messages)
                 if candidate is not None:
@@ -474,6 +475,7 @@ def run_episode(
                     reason_key = "empty_or_invalid_response"
                 else:
                     reason_key = "heuristic_default"
                 fallback_reasons[reason_key] = fallback_reasons.get(reason_key, 0) + 1
             if trace_agent and not compliance_stdout:
@@ -526,6 +528,11 @@ def run_episode(
                     step_error = str(raw_err)
             if not step_error and obs.tool_output and str(obs.tool_output).startswith("ERROR:"):
                 step_error = str(obs.tool_output)
             if compliance_stdout:
                 _compliance_log_step(

             action: FlakySleuthAction
             action_source = "heuristic"
             llm_meta: dict[str, Any] = {"attempted": False, "raw_output": "", "error": ""}
+            step_fallback_reason: str | None = None
             try:
                 candidate, llm_meta = llm_action(messages)
                 if candidate is not None:
                     reason_key = "empty_or_invalid_response"
                 else:
                     reason_key = "heuristic_default"
+                step_fallback_reason = reason_key
                 fallback_reasons[reason_key] = fallback_reasons.get(reason_key, 0) + 1
             if trace_agent and not compliance_stdout:
                     step_error = str(raw_err)
             if not step_error and obs.tool_output and str(obs.tool_output).startswith("ERROR:"):
                 step_error = str(obs.tool_output)
+            if step_fallback_reason:
+                if step_error:
+                    step_error = f"{step_error}; llm_fallback:{step_fallback_reason}"
+                else:
+                    step_error = f"llm_fallback:{step_fallback_reason}"
             if compliance_stdout:
                 _compliance_log_step(