shank
Update: Triggering the full run
75cd77b
"""
AgentDebuggerEnv β€” Training Monitor
Gradio UI that boots GRPO training in a background process and streams live status.
"""
import subprocess
import threading
import gradio as gr
import os
import json
import sys
import time
# ── Start training in background ───────────────────────────────────────────────
training_log: list[str] = []
training_proc: subprocess.Popen | None = None
training_started_at: float = time.time()
def _stream_training():
global training_proc
script = os.path.join(os.path.dirname(__file__), "training", "train_grpo.py")
training_proc = subprocess.Popen(
[sys.executable, script],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)
for line in training_proc.stdout:
line = line.rstrip()
training_log.append(line)
if len(training_log) > 300:
training_log.pop(0)
training_proc.wait()
training_thread = threading.Thread(target=_stream_training, daemon=True)
training_thread.start()
# ── Status checker ─────────────────────────────────────────────────────────────
def check_status() -> str:
lines: list[str] = []
elapsed = int(time.time() - training_started_at)
lines.append(f"Elapsed: {elapsed // 60}m {elapsed % 60}s")
if training_proc is None:
lines.append("Status: starting up (give it ~2 minutes)...")
elif training_proc.poll() is None:
lines.append("Status: TRAINING RUNNING βœ“")
else:
code = training_proc.poll()
lines.append(f"Status: {'COMPLETED βœ“' if code == 0 else f'EXITED (code {code})'}")
if os.path.exists("baseline_results.json"):
try:
with open("baseline_results.json") as f:
baseline = json.load(f)
lines.append(f"\nBaseline solve rate : {baseline['solve_rate']:.1%}")
lines.append(f"Baseline avg reward : {baseline['avg_reward']:.3f}")
except Exception:
pass
if os.path.exists("checkpoints"):
ckpts = sorted(
[d for d in os.listdir("checkpoints") if os.path.isdir(f"checkpoints/{d}")]
)
if ckpts:
lines.append(f"\nLatest checkpoint : {ckpts[-1]}")
lines.append(f"Total checkpoints : {len(ckpts)}")
if os.path.exists("final_model"):
lines.append("\nFinal model saved βœ“ β€” training complete!")
lines.append("\n" + "─" * 50)
lines.append("Recent log (last 40 lines):")
lines.extend(training_log[-40:] if training_log else ["(no output yet)"])
return "\n".join(lines)
# ── Gradio UI ──────────────────────────────────────────────────────────────────
with gr.Blocks(title="AgentDebuggerEnv Training Monitor") as demo:
gr.Markdown(
"""
# AgentDebuggerEnv β€” GRPO Training Monitor
Training **Qwen2.5-Coder-7B-Instruct** on structured hypothesis-driven debugging.
- Algorithm: GRPO (same as DeepSeek-R1)
- Dataset: 90 hand-validated bugs across 3 difficulty tiers
- Curriculum: Tier 1 (steps 0–150) β†’ Tier 1+2 (150–350) β†’ All tiers (350–500)
"""
)
status_box = gr.Textbox(
label="Training Status",
lines=50,
max_lines=50,
interactive=False,
)
refresh_btn = gr.Button("πŸ”„ Refresh Status")
refresh_btn.click(fn=check_status, outputs=status_box)
# Load initial status on page load
demo.load(fn=check_status, outputs=status_box)
# Auto-refresh timer (Gradio 4.x syntax)
timer = gr.Timer(value=30)
timer.tick(fn=check_status, outputs=status_box)
demo.launch(server_name="0.0.0.0", server_port=7860)