shank commited on
Commit
0181886
·
1 Parent(s): 212d2d9

Fix: Formatted the output

Browse files
Files changed (2) hide show
  1. .gitignore +2 -0
  2. inference.py +12 -1
.gitignore CHANGED
@@ -44,3 +44,5 @@ baseline_results.json
44
  # Temp sandbox files (should clean up but just in case)
45
  sandbox_*.py
46
  /tmp/sandbox_*
 
 
 
44
  # Temp sandbox files (should clean up but just in case)
45
  sandbox_*.py
46
  /tmp/sandbox_*
47
+
48
+ instructions.md
inference.py CHANGED
@@ -170,6 +170,9 @@ def run_episode(task_id: str) -> dict:
170
  reset_resp.raise_for_status()
171
  obs = reset_resp.json()
172
 
 
 
 
173
  messages = [
174
  {"role": "system", "content": SYSTEM_PROMPT},
175
  {"role": "user", "content": build_initial_message(obs)}
@@ -211,6 +214,9 @@ def run_episode(task_id: str) -> dict:
211
  info = result["info"]
212
  last_result = result
213
 
 
 
 
214
  # Build context for next LLM call
215
  step_msg = build_step_message(obs, reward, info)
216
  messages.append({"role": "assistant", "content": raw})
@@ -220,7 +226,7 @@ def run_episode(task_id: str) -> dict:
220
  break
221
 
222
  final_obs = last_result["observation"]
223
- return {
224
  "task_id": task_id,
225
  "grader_score": last_result["reward"]["grader_score"],
226
  "cumulative_reward": last_result["reward"]["cumulative_reward"],
@@ -232,6 +238,11 @@ def run_episode(task_id: str) -> dict:
232
  "final_action_type": action.get("action_type", "unknown")
233
  }
234
 
 
 
 
 
 
235
 
236
  def main():
237
  print("AgentDebuggerEnv — Baseline Inference")
 
170
  reset_resp.raise_for_status()
171
  obs = reset_resp.json()
172
 
173
+ # [START] task=NAME
174
+ print(f"[START] task={task_id}", flush=True)
175
+
176
  messages = [
177
  {"role": "system", "content": SYSTEM_PROMPT},
178
  {"role": "user", "content": build_initial_message(obs)}
 
214
  info = result["info"]
215
  last_result = result
216
 
217
+ # [STEP] step=N reward=R
218
+ print(f"[STEP] step={obs['step_number']} reward={reward['step_reward']}", flush=True)
219
+
220
  # Build context for next LLM call
221
  step_msg = build_step_message(obs, reward, info)
222
  messages.append({"role": "assistant", "content": raw})
 
226
  break
227
 
228
  final_obs = last_result["observation"]
229
+ result = {
230
  "task_id": task_id,
231
  "grader_score": last_result["reward"]["grader_score"],
232
  "cumulative_reward": last_result["reward"]["cumulative_reward"],
 
238
  "final_action_type": action.get("action_type", "unknown")
239
  }
240
 
241
+ # [END] task=NAME score=S steps=N
242
+ print(f"[END] task={task_id} score={result['grader_score']} steps={result['steps_taken']}", flush=True)
243
+
244
+ return result
245
+
246
 
247
  def main():
248
  print("AgentDebuggerEnv — Baseline Inference")