"""
AgentDebuggerEnv — Training Monitor
Gradio UI that boots GRPO training in a background process and streams live status.
"""

import subprocess
import threading
import gradio as gr
import os
import json
import sys
import time

# ── Start training in background ───────────────────────────────────────────────
training_log: list[str] = []
training_proc: subprocess.Popen | None = None
training_started_at: float = time.time()


def _stream_training():
    global training_proc
    script = os.path.join(os.path.dirname(__file__), "training", "train_grpo.py")
    training_proc = subprocess.Popen(
        [sys.executable, script],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1,
    )
    for line in training_proc.stdout:
        line = line.rstrip()
        training_log.append(line)
        if len(training_log) > 300:
            training_log.pop(0)
    training_proc.wait()


training_thread = threading.Thread(target=_stream_training, daemon=True)
training_thread.start()


# ── Status checker ─────────────────────────────────────────────────────────────
def check_status() -> str:
    lines: list[str] = []
    elapsed = int(time.time() - training_started_at)
    lines.append(f"Elapsed: {elapsed // 60}m {elapsed % 60}s")

    if training_proc is None:
        lines.append("Status: starting up (give it ~2 minutes)...")
    elif training_proc.poll() is None:
        lines.append("Status: TRAINING RUNNING ✓")
    else:
        code = training_proc.poll()
        lines.append(f"Status: {'COMPLETED ✓' if code == 0 else f'EXITED (code {code})'}")

    if os.path.exists("baseline_results.json"):
        try:
            with open("baseline_results.json") as f:
                baseline = json.load(f)
            lines.append(f"\nBaseline solve rate : {baseline['solve_rate']:.1%}")
            lines.append(f"Baseline avg reward : {baseline['avg_reward']:.3f}")
        except Exception:
            pass

    if os.path.exists("checkpoints"):
        ckpts = sorted(
            [d for d in os.listdir("checkpoints") if os.path.isdir(f"checkpoints/{d}")]
        )
        if ckpts:
            lines.append(f"\nLatest checkpoint   : {ckpts[-1]}")
            lines.append(f"Total checkpoints   : {len(ckpts)}")

    if os.path.exists("final_model"):
        lines.append("\nFinal model saved ✓ — training complete!")

    lines.append("\n" + "─" * 50)
    lines.append("Recent log (last 40 lines):")
    lines.extend(training_log[-40:] if training_log else ["(no output yet)"])

    return "\n".join(lines)


# ── Gradio UI ──────────────────────────────────────────────────────────────────
with gr.Blocks(title="AgentDebuggerEnv Training Monitor") as demo:
    gr.Markdown(
        """
# AgentDebuggerEnv — GRPO Training Monitor
Training **Qwen2.5-Coder-7B-Instruct** on structured hypothesis-driven debugging.
- Algorithm: GRPO (same as DeepSeek-R1)
- Dataset: 90 hand-validated bugs across 3 difficulty tiers
- Curriculum: Tier 1 (steps 0–150) → Tier 1+2 (150–350) → All tiers (350–500)
        """
    )
    status_box = gr.Textbox(
        label="Training Status",
        lines=50,
        max_lines=50,
        interactive=False,
    )
    refresh_btn = gr.Button("🔄 Refresh Status")
    refresh_btn.click(fn=check_status, outputs=status_box)

    # Load initial status on page load
    demo.load(fn=check_status, outputs=status_box)

    # Auto-refresh timer (Gradio 4.x syntax)
    timer = gr.Timer(value=30)
    timer.tick(fn=check_status, outputs=status_box)

demo.launch(server_name="0.0.0.0", server_port=7860)