shank commited on
Commit Β·
75cd77b
1
Parent(s): 2b499e7
Update: Triggering the full run
Browse files
app.py
CHANGED
|
@@ -21,7 +21,7 @@ def _stream_training():
|
|
| 21 |
global training_proc
|
| 22 |
script = os.path.join(os.path.dirname(__file__), "training", "train_grpo.py")
|
| 23 |
training_proc = subprocess.Popen(
|
| 24 |
-
[sys.executable, script
|
| 25 |
stdout=subprocess.PIPE,
|
| 26 |
stderr=subprocess.STDOUT,
|
| 27 |
text=True,
|
|
@@ -88,7 +88,7 @@ with gr.Blocks(title="AgentDebuggerEnv Training Monitor") as demo:
|
|
| 88 |
Training **Qwen2.5-Coder-7B-Instruct** on structured hypothesis-driven debugging.
|
| 89 |
- Algorithm: GRPO (same as DeepSeek-R1)
|
| 90 |
- Dataset: 90 hand-validated bugs across 3 difficulty tiers
|
| 91 |
-
- Curriculum: Tier 1 (steps 0β
|
| 92 |
"""
|
| 93 |
)
|
| 94 |
status_box = gr.Textbox(
|
|
|
|
| 21 |
global training_proc
|
| 22 |
script = os.path.join(os.path.dirname(__file__), "training", "train_grpo.py")
|
| 23 |
training_proc = subprocess.Popen(
|
| 24 |
+
[sys.executable, script],
|
| 25 |
stdout=subprocess.PIPE,
|
| 26 |
stderr=subprocess.STDOUT,
|
| 27 |
text=True,
|
|
|
|
| 88 |
Training **Qwen2.5-Coder-7B-Instruct** on structured hypothesis-driven debugging.
|
| 89 |
- Algorithm: GRPO (same as DeepSeek-R1)
|
| 90 |
- Dataset: 90 hand-validated bugs across 3 difficulty tiers
|
| 91 |
+
- Curriculum: Tier 1 (steps 0β150) β Tier 1+2 (150β350) β All tiers (350β500)
|
| 92 |
"""
|
| 93 |
)
|
| 94 |
status_box = gr.Textbox(
|