File size: 3,983 Bytes
b92ad01 75cd77b b92ad01 75cd77b b92ad01 5eea2dd b92ad01 5eea2dd b92ad01 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | """
AgentDebuggerEnv β Training Monitor
Gradio UI that boots GRPO training in a background process and streams live status.
"""
import subprocess
import threading
import gradio as gr
import os
import json
import sys
import time
# ββ Start training in background βββββββββββββββββββββββββββββββββββββββββββββββ
training_log: list[str] = []
training_proc: subprocess.Popen | None = None
training_started_at: float = time.time()
def _stream_training():
global training_proc
script = os.path.join(os.path.dirname(__file__), "training", "train_grpo.py")
training_proc = subprocess.Popen(
[sys.executable, script],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)
for line in training_proc.stdout:
line = line.rstrip()
training_log.append(line)
if len(training_log) > 300:
training_log.pop(0)
training_proc.wait()
training_thread = threading.Thread(target=_stream_training, daemon=True)
training_thread.start()
# ββ Status checker βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def check_status() -> str:
lines: list[str] = []
elapsed = int(time.time() - training_started_at)
lines.append(f"Elapsed: {elapsed // 60}m {elapsed % 60}s")
if training_proc is None:
lines.append("Status: starting up (give it ~2 minutes)...")
elif training_proc.poll() is None:
lines.append("Status: TRAINING RUNNING β")
else:
code = training_proc.poll()
lines.append(f"Status: {'COMPLETED β' if code == 0 else f'EXITED (code {code})'}")
if os.path.exists("baseline_results.json"):
try:
with open("baseline_results.json") as f:
baseline = json.load(f)
lines.append(f"\nBaseline solve rate : {baseline['solve_rate']:.1%}")
lines.append(f"Baseline avg reward : {baseline['avg_reward']:.3f}")
except Exception:
pass
if os.path.exists("checkpoints"):
ckpts = sorted(
[d for d in os.listdir("checkpoints") if os.path.isdir(f"checkpoints/{d}")]
)
if ckpts:
lines.append(f"\nLatest checkpoint : {ckpts[-1]}")
lines.append(f"Total checkpoints : {len(ckpts)}")
if os.path.exists("final_model"):
lines.append("\nFinal model saved β β training complete!")
lines.append("\n" + "β" * 50)
lines.append("Recent log (last 40 lines):")
lines.extend(training_log[-40:] if training_log else ["(no output yet)"])
return "\n".join(lines)
# ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Blocks(title="AgentDebuggerEnv Training Monitor") as demo:
gr.Markdown(
"""
# AgentDebuggerEnv β GRPO Training Monitor
Training **Qwen2.5-Coder-7B-Instruct** on structured hypothesis-driven debugging.
- Algorithm: GRPO (same as DeepSeek-R1)
- Dataset: 90 hand-validated bugs across 3 difficulty tiers
- Curriculum: Tier 1 (steps 0β150) β Tier 1+2 (150β350) β All tiers (350β500)
"""
)
status_box = gr.Textbox(
label="Training Status",
lines=50,
max_lines=50,
interactive=False,
)
refresh_btn = gr.Button("π Refresh Status")
refresh_btn.click(fn=check_status, outputs=status_box)
# Load initial status on page load
demo.load(fn=check_status, outputs=status_box)
# Auto-refresh timer (Gradio 4.x syntax)
timer = gr.Timer(value=30)
timer.tick(fn=check_status, outputs=status_box)
demo.launch(server_name="0.0.0.0", server_port=7860)
|