shank commited on
Commit ·
0181886
1
Parent(s): 212d2d9
Fix: Formatted the output
Browse files- .gitignore +2 -0
- inference.py +12 -1
.gitignore
CHANGED
|
@@ -44,3 +44,5 @@ baseline_results.json
|
|
| 44 |
# Temp sandbox files (should clean up but just in case)
|
| 45 |
sandbox_*.py
|
| 46 |
/tmp/sandbox_*
|
|
|
|
|
|
|
|
|
| 44 |
# Temp sandbox files (should clean up but just in case)
|
| 45 |
sandbox_*.py
|
| 46 |
/tmp/sandbox_*
|
| 47 |
+
|
| 48 |
+
instructions.md
|
inference.py
CHANGED
|
@@ -170,6 +170,9 @@ def run_episode(task_id: str) -> dict:
|
|
| 170 |
reset_resp.raise_for_status()
|
| 171 |
obs = reset_resp.json()
|
| 172 |
|
|
|
|
|
|
|
|
|
|
| 173 |
messages = [
|
| 174 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 175 |
{"role": "user", "content": build_initial_message(obs)}
|
|
@@ -211,6 +214,9 @@ def run_episode(task_id: str) -> dict:
|
|
| 211 |
info = result["info"]
|
| 212 |
last_result = result
|
| 213 |
|
|
|
|
|
|
|
|
|
|
| 214 |
# Build context for next LLM call
|
| 215 |
step_msg = build_step_message(obs, reward, info)
|
| 216 |
messages.append({"role": "assistant", "content": raw})
|
|
@@ -220,7 +226,7 @@ def run_episode(task_id: str) -> dict:
|
|
| 220 |
break
|
| 221 |
|
| 222 |
final_obs = last_result["observation"]
|
| 223 |
-
|
| 224 |
"task_id": task_id,
|
| 225 |
"grader_score": last_result["reward"]["grader_score"],
|
| 226 |
"cumulative_reward": last_result["reward"]["cumulative_reward"],
|
|
@@ -232,6 +238,11 @@ def run_episode(task_id: str) -> dict:
|
|
| 232 |
"final_action_type": action.get("action_type", "unknown")
|
| 233 |
}
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
def main():
|
| 237 |
print("AgentDebuggerEnv — Baseline Inference")
|
|
|
|
| 170 |
reset_resp.raise_for_status()
|
| 171 |
obs = reset_resp.json()
|
| 172 |
|
| 173 |
+
# [START] task=NAME
|
| 174 |
+
print(f"[START] task={task_id}", flush=True)
|
| 175 |
+
|
| 176 |
messages = [
|
| 177 |
{"role": "system", "content": SYSTEM_PROMPT},
|
| 178 |
{"role": "user", "content": build_initial_message(obs)}
|
|
|
|
| 214 |
info = result["info"]
|
| 215 |
last_result = result
|
| 216 |
|
| 217 |
+
# [STEP] step=N reward=R
|
| 218 |
+
print(f"[STEP] step={obs['step_number']} reward={reward['step_reward']}", flush=True)
|
| 219 |
+
|
| 220 |
# Build context for next LLM call
|
| 221 |
step_msg = build_step_message(obs, reward, info)
|
| 222 |
messages.append({"role": "assistant", "content": raw})
|
|
|
|
| 226 |
break
|
| 227 |
|
| 228 |
final_obs = last_result["observation"]
|
| 229 |
+
result = {
|
| 230 |
"task_id": task_id,
|
| 231 |
"grader_score": last_result["reward"]["grader_score"],
|
| 232 |
"cumulative_reward": last_result["reward"]["cumulative_reward"],
|
|
|
|
| 238 |
"final_action_type": action.get("action_type", "unknown")
|
| 239 |
}
|
| 240 |
|
| 241 |
+
# [END] task=NAME score=S steps=N
|
| 242 |
+
print(f"[END] task={task_id} score={result['grader_score']} steps={result['steps_taken']}", flush=True)
|
| 243 |
+
|
| 244 |
+
return result
|
| 245 |
+
|
| 246 |
|
| 247 |
def main():
|
| 248 |
print("AgentDebuggerEnv — Baseline Inference")
|