File size: 3,983 Bytes
b92ad01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75cd77b
b92ad01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75cd77b
b92ad01
 
 
 
 
 
 
 
5eea2dd
b92ad01
 
5eea2dd
 
 
 
 
 
b92ad01
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""
AgentDebuggerEnv β€” Training Monitor
Gradio UI that boots GRPO training in a background process and streams live status.
"""

import subprocess
import threading
import gradio as gr
import os
import json
import sys
import time

# ── Start training in background ───────────────────────────────────────────────
training_log: list[str] = []
training_proc: subprocess.Popen | None = None
training_started_at: float = time.time()


def _stream_training():
    global training_proc
    script = os.path.join(os.path.dirname(__file__), "training", "train_grpo.py")
    training_proc = subprocess.Popen(
        [sys.executable, script],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1,
    )
    for line in training_proc.stdout:
        line = line.rstrip()
        training_log.append(line)
        if len(training_log) > 300:
            training_log.pop(0)
    training_proc.wait()


training_thread = threading.Thread(target=_stream_training, daemon=True)
training_thread.start()


# ── Status checker ─────────────────────────────────────────────────────────────
def check_status() -> str:
    lines: list[str] = []
    elapsed = int(time.time() - training_started_at)
    lines.append(f"Elapsed: {elapsed // 60}m {elapsed % 60}s")

    if training_proc is None:
        lines.append("Status: starting up (give it ~2 minutes)...")
    elif training_proc.poll() is None:
        lines.append("Status: TRAINING RUNNING βœ“")
    else:
        code = training_proc.poll()
        lines.append(f"Status: {'COMPLETED βœ“' if code == 0 else f'EXITED (code {code})'}")

    if os.path.exists("baseline_results.json"):
        try:
            with open("baseline_results.json") as f:
                baseline = json.load(f)
            lines.append(f"\nBaseline solve rate : {baseline['solve_rate']:.1%}")
            lines.append(f"Baseline avg reward : {baseline['avg_reward']:.3f}")
        except Exception:
            pass

    if os.path.exists("checkpoints"):
        ckpts = sorted(
            [d for d in os.listdir("checkpoints") if os.path.isdir(f"checkpoints/{d}")]
        )
        if ckpts:
            lines.append(f"\nLatest checkpoint   : {ckpts[-1]}")
            lines.append(f"Total checkpoints   : {len(ckpts)}")

    if os.path.exists("final_model"):
        lines.append("\nFinal model saved βœ“ β€” training complete!")

    lines.append("\n" + "─" * 50)
    lines.append("Recent log (last 40 lines):")
    lines.extend(training_log[-40:] if training_log else ["(no output yet)"])

    return "\n".join(lines)


# ── Gradio UI ──────────────────────────────────────────────────────────────────
with gr.Blocks(title="AgentDebuggerEnv Training Monitor") as demo:
    gr.Markdown(
        """
# AgentDebuggerEnv β€” GRPO Training Monitor
Training **Qwen2.5-Coder-7B-Instruct** on structured hypothesis-driven debugging.
- Algorithm: GRPO (same as DeepSeek-R1)
- Dataset: 90 hand-validated bugs across 3 difficulty tiers
- Curriculum: Tier 1 (steps 0–150) β†’ Tier 1+2 (150–350) β†’ All tiers (350–500)
        """
    )
    status_box = gr.Textbox(
        label="Training Status",
        lines=50,
        max_lines=50,
        interactive=False,
    )
    refresh_btn = gr.Button("πŸ”„ Refresh Status")
    refresh_btn.click(fn=check_status, outputs=status_box)

    # Load initial status on page load
    demo.load(fn=check_status, outputs=status_box)

    # Auto-refresh timer (Gradio 4.x syntax)
    timer = gr.Timer(value=30)
    timer.tick(fn=check_status, outputs=status_box)

demo.launch(server_name="0.0.0.0", server_port=7860)