Add parsed training metrics and plots
Browse files- .gitattributes +2 -0
- training_logs/20260429_184357_metrics_a2_rl_unitsyn_python_v3_402161.csv +13 -0
- training_logs/20260429_184357_metrics_a2_rl_unitsyn_python_v3_402162.csv +3 -0
- training_logs/20260429_184357_metrics_report.md +265 -0
- training_logs/20260429_184357_metrics_table.csv +15 -0
- training_logs/20260429_184357_reward_vs_steps.png +3 -0
- training_logs/20260429_184357_trial_results.csv +0 -0
- training_logs/20260429_184357_turn_count_distribution.png +3 -0
- training_logs/20260429_184357_vllm_metrics_a2_rl_unitsyn_python_v3_402161.csv +0 -0
- training_logs/20260429_184357_vllm_metrics_a2_rl_unitsyn_python_v3_402162.csv +0 -0
- training_logs/20260429_184357_vllm_metrics_table.csv +0 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
training_logs/20260429_184357_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
training_logs/20260429_184357_turn_count_distribution.png filter=lfs diff=lfs merge=lfs -text
|
training_logs/20260429_184357_metrics_a2_rl_unitsyn_python_v3_402161.csv
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,batch_errors/avg_VerifierRuntimeError,batch_errors/total_VerifierRuntimeError,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError
|
| 2 |
+
0.0,0,64,512,0,0.0,0,0.0,5121.6914,4576.6295,5769.2436,13905,1499,2503.5505,0.543,0.0021,0.1803,0.0,0.219,0.0,0.0,1.0,0.0,0.0323,0.6719,0.543,11.793,75.1959,379.3328,55.8,857.9687,478.6359,10.9023,0.1146,2.5571,72.0045,656.5065,728.9238,44.3353,6093.5913,52.8301,656.8044,5309.2801,0,1,128,1024,994,14,18,0.0859375,11.0,0.140625,18.0,0.0078125,1.0,,,,,
|
| 3 |
+
0.0,0,64,512,1,1.0,1,1.0,5871.2051,5096.5576,6791.5128,15067,1,3402.3282,0.543,-0.0001,0.1554,-0.0002,0.2129,-0.0057,0.0,1.0,0.0,0.0274,0.6875,0.543,12.9813,75.3753,375.7304,56.2,857.9687,482.2383,0.0121,0.0559,2.7428,72.8696,707.4558,780.6952,29.1329,2730.7674,52.7333,707.7694,1894.5939,0,2,64,512,464,19,44,0.046875,3.0,0.6875,44.0,,,0.015625,1.0,,,
|
| 4 |
+
0.0,0,64,512,2,2.0,2,1.0,5706.7969,4967.944,6318.9893,14787,1,3398.0642,0.4531,-0.0022,0.1487,0.0,0.198,0.0,0.0,1.0,0.0,0.0284,0.625,0.4531,13.4866,75.6917,379.7476,55.7,857.9687,478.2211,0.3416,0.0392,2.6589,73.1319,687.2284,760.7086,29.5563,2618.6945,52.8218,687.5371,1802.5023,0,3,64,512,491,9,20,0.015625,1.0,0.3125,20.0,,,,,,,
|
| 5 |
+
0.0,0,64,512,3,3.0,3,1.0,5825.2578,4940.4704,6689.5483,14273,1,3209.1042,0.4941,-0.0017,0.0659,0.0,0.2102,0.0,0.0,1.0,0.0,0.0177,0.5312,0.4941,13.7198,75.7789,383.0292,55.4,857.9687,474.9395,0.3775,0.038,2.6718,77.6872,669.8871,747.9511,28.8753,2255.3097,53.5368,670.2257,1451.1474,0,4,64,512,456,20,40,0.09375,6.0,0.625,40.0,0.03125,2.0,0.125,8.0,,,
|
| 6 |
+
0.0,0,64,512,4,3.75,3,1.0,5520.8398,5594.9188,5458.4856,16082,1,3484.2247,0.457,0.0055,0.1648,-0.0002,0.2057,-0.0064,0.0,1.0,0.0,0.0214,0.6094,0.457,14.6653,76.0574,385.3959,55.1,857.9687,472.5728,0.3196,0.044,2.9597,73.5587,681.9984,755.9482,29.4618,3542.6439,52.7498,682.3452,2730.9604,0,5,64,512,464,14,42,0.09375,6.0,0.65625,42.0,,,,,37.5898,,
|
| 7 |
+
0.0,0,64,512,5,4.125,3,1.0,5461.6738,5218.1189,5769.8894,14552,1,3328.2592,0.5586,0.014,0.1023,-0.0002,0.1969,-0.0076,0.0,1.0,0.0,0.0179,0.7031,0.5586,15.0607,76.0109,385.4834,55.1,857.9687,472.4853,0.366,0.0393,2.7127,68.8947,678.4589,747.7171,29.9495,2213.735,54.8743,678.7827,1408.4036,0,6,64,512,474,15,28,0.125,8.0,0.4375,28.0,,,0.03125,2.0,,,
|
| 8 |
+
0.0,0,64,512,6,4.6562,4,1.0,7798.1641,7124.3027,8179.4006,20083,1,3802.9833,0.3613,0.0039,0.1358,-0.0001,0.2261,-0.004,0.0,1.0,0.0,0.0185,0.5312,0.3613,18.1326,76.0253,380.9768,55.6,857.9687,476.9919,0.6987,0.0516,3.5227,93.4902,731.6962,825.5737,29.3423,4983.416,13.7041,732.0316,4140.5883,0,7,64,512,512,0,0,,,,,,,,,,,
|
| 9 |
+
0.0,0,64,512,0,0.0,0,0.0,4865.5312,4540.8412,5507.3605,14128,1386,2616.8319,0.6641,-0.0047,0.0958,-0.0,0.206,-0.0,0.0,1.0,0.0,0.0163,0.7188,0.6641,18.3456,76.1459,384.4786,55.2,857.9687,473.4901,0.3173,0.0353,2.9251,73.3734,646.7222,720.5432,29.7617,5669.0399,52.8866,647.1342,4892.6495,1,8,64,512,470,16,39,0.046875,3.0,0.609375,39.0,,,,,,,
|
| 10 |
+
0.0,0,64,512,1,1.0,1,1.0,5855.5039,5276.4774,6594.0844,13894,1,3271.3689,0.5605,0.0007,0.0512,-0.0,0.2096,-0.0006,0.0,1.0,0.0,0.0219,0.6406,0.5605,18.6552,76.4328,386.8148,54.9,857.9687,471.1539,0.3384,0.0375,2.7372,78.6677,660.2909,739.3591,30.4714,3239.5269,52.9167,660.6535,2444.512,1,9,64,512,432,25,70,0.125,8.0,1.078125,69.0,,,0.03125,2.0,,0.015625,1.0
|
| 11 |
+
0.0,0,64,512,2,2.0,2,1.0,5375.2695,5344.5093,5397.7162,14823,1,3872.2484,0.4219,0.0135,0.113,-0.0002,0.1788,-0.007,0.0,1.0,0.0,0.0154,0.5469,0.4219,18.8019,76.4187,385.8892,55.0,857.9687,472.0795,0.3767,0.0413,2.7467,73.8308,700.3324,774.57,29.1637,3338.1958,53.0117,700.6976,2507.8644,1,10,64,512,463,23,37,0.140625,9.0,0.578125,37.0,,,0.046875,3.0,36.7892,,
|
| 12 |
+
0.0,0,64,512,3,3.0,3,1.0,5192.8828,4679.4904,5726.7291,15138,1,3311.2403,0.5098,0.0084,0.0758,-0.0002,0.1873,-0.0056,0.0,1.0,0.0,0.0164,0.5625,0.5098,18.9517,76.4518,383.7386,55.3,857.9687,474.23,0.3045,0.0384,2.7622,74.4343,676.0857,750.8928,28.7197,2321.6045,57.2462,676.4197,1510.6996,1,11,64,512,437,26,61,0.140625,9.0,0.953125,61.0,,,0.078125,5.0,,,
|
| 13 |
+
0.0,0,64,512,4,3.7969,3,1.0,5528.9668,5645.4653,5422.0674,16073,1,3727.4604,0.4785,0.0021,0.1344,-0.0001,0.1796,-0.0038,0.0,1.0,0.0,0.0198,0.6094,0.4785,19.0433,76.3939,383.2556,55.3,857.9687,474.7131,0.3795,0.0401,3.2865,78.4764,697.4553,776.3111,28.7288,2932.4204,52.7552,697.7943,2100.0414,1,12,16,128,110,8,15,0.0625,1.0,0.9375,15.0,,,0.125,2.0,,,
|
training_logs/20260429_184357_metrics_a2_rl_unitsyn_python_v3_402162.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError
|
| 2 |
+
0.0,0,64,512,5,1.1406,0,0.2344,5434.5586,5246.1829,5770.3587,13053,1,3046.1974,0.6406,-0.003,0.059,0.0,0.189,0.0,0.0,1.0,0.0,0.0114,0.7308,0.6406,11.8048,75.1254,372.3459,56.6,857.9687,485.6227,12.7402,0.0478,2.4902,73.844,683.1484,757.335,46.1195,8330.7966,51.8296,683.4422,7519.1416,1,13,65,520,426,31,55,0.5846153846153846,38,0.8461538461538461,55.0,0.015384615384615385,1.0
|
| 3 |
+
0.0,0,64,512,1,1.0,1,1.0,6345.9297,6347.6109,6344.6529,18003,1,4261.8702,0.4316,0.0217,0.1127,-0.0005,0.1793,-0.0156,0.0,1.0,0.0,0.0191,0.5625,0.4316,15.5447,75.3049,399.4698,53.4,857.9687,458.4989,0.3343,0.0742,3.1141,89.1038,721.9162,811.4036,33.5523,871.8394,57.2936,722.2251,0.0049,1,14,11,88,84,4,0,0.36363636363636365,4,,,,
|
training_logs/20260429_184357_metrics_report.md
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SkyRL Training Metrics Analysis
|
| 2 |
+
|
| 3 |
+
Generated from 2 log files
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
| Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
|
| 8 |
+
|----------|-------------|---------------|---------------------|-------------------|----------------|
|
| 9 |
+
| a2_rl_unitsyn_python_v3_402161 | 12 | 12 | 0.5037 | 0.6641 | 41938.9 |
|
| 10 |
+
| a2_rl_unitsyn_python_v3_402162 | 14 | 2 | 0.5361 | 0.6406 | 9202.6 |
|
| 11 |
+
|
| 12 |
+
## Async Metrics
|
| 13 |
+
|
| 14 |
+
| | Mean | Std | Min | Max | Count |
|
| 15 |
+
|:------------------------------|-----------:|---------:|------:|---------:|--------:|
|
| 16 |
+
| async/discard_rate | 0 | 0 | 0 | 0 | 14 |
|
| 17 |
+
| async/discarded_count | 0 | 0 | 0 | 0 | 14 |
|
| 18 |
+
| async/effective_batch_groups | 64 | 0 | 64 | 64 | 14 |
|
| 19 |
+
| async/effective_batch_samples | 512 | 0 | 512 | 512 | 14 |
|
| 20 |
+
| async/staleness_max | 2.64286 | 1.94569 | 0 | 6 | 14 |
|
| 21 |
+
| async/staleness_mean | 2.17634 | 1.54834 | 0 | 4.6562 | 14 |
|
| 22 |
+
| async/staleness_min | 1.85714 | 1.35062 | 0 | 4 | 14 |
|
| 23 |
+
| async/staleness_ratio | 0.802457 | 0.396118 | 0 | 1 | 14 |
|
| 24 |
+
|
| 25 |
+
## Generate Metrics
|
| 26 |
+
|
| 27 |
+
| | Mean | Std | Min | Max | Count |
|
| 28 |
+
|:-------------------------------------|----------:|---------:|---------:|---------:|--------:|
|
| 29 |
+
| generate/avg_num_tokens | 5707.45 | 705.472 | 4865.53 | 7798.16 | 14 |
|
| 30 |
+
| generate/avg_tokens_non_zero_rewards | 5328.54 | 701.545 | 4540.84 | 7124.3 | 14 |
|
| 31 |
+
| generate/avg_tokens_zero_rewards | 6124.29 | 768.378 | 5397.72 | 8179.4 | 14 |
|
| 32 |
+
| generate/max_num_tokens | 15275.8 | 1837.96 | 13053 | 20083 | 14 |
|
| 33 |
+
| generate/min_num_tokens | 206.929 | 523.93 | 1 | 1499 | 14 |
|
| 34 |
+
| generate/std_num_tokens | 3373.98 | 467.502 | 2503.55 | 4261.87 | 14 |
|
| 35 |
+
|
| 36 |
+
## Loss Metrics
|
| 37 |
+
|
| 38 |
+
| | Mean | Std | Min | Max | Count |
|
| 39 |
+
|:----------------------------|---------:|-----------:|--------:|-------:|--------:|
|
| 40 |
+
| loss/avg_final_rewards | 0.508364 | 0.083743 | 0.3613 | 0.6641 | 14 |
|
| 41 |
+
| loss/avg_raw_advantages | 0.0043 | 0.00761799 | -0.0047 | 0.0217 | 14 |
|
| 42 |
+
| loss/avg_raw_advantages_abs | 0.113936 | 0.041136 | 0.0512 | 0.1803 | 14 |
|
| 43 |
+
|
| 44 |
+
## Policy Metrics
|
| 45 |
+
|
| 46 |
+
| | Mean | Std | Min | Max | Count |
|
| 47 |
+
|:---------------------------|-------------:|------------:|--------:|-------:|--------:|
|
| 48 |
+
| policy/final_loss | -0.000121429 | 0.000142389 | -0.0005 | 0 | 14 |
|
| 49 |
+
| policy/policy_entropy | 0.199886 | 0.0153501 | 0.1788 | 0.2261 | 14 |
|
| 50 |
+
| policy/policy_loss | -0.00402143 | 0.00446631 | -0.0156 | 0 | 14 |
|
| 51 |
+
| policy/policy_lr | 0 | 0 | 0 | 0 | 14 |
|
| 52 |
+
| policy/policy_update_steps | 1 | 0 | 1 | 1 | 14 |
|
| 53 |
+
| policy/ppo_clip_ratio | 0 | 0 | 0 | 0 | 14 |
|
| 54 |
+
| policy/raw_grad_norm | 0.0202786 | 0.00565457 | 0.0114 | 0.0323 | 14 |
|
| 55 |
+
|
| 56 |
+
## Reward Metrics
|
| 57 |
+
|
| 58 |
+
| | Mean | Std | Min | Max | Count |
|
| 59 |
+
|:----------------------|---------:|----------:|-------:|-------:|--------:|
|
| 60 |
+
| reward/avg_pass_at_8 | 0.623629 | 0.0703892 | 0.5312 | 0.7308 | 14 |
|
| 61 |
+
| reward/avg_raw_reward | 0.508364 | 0.083743 | 0.3613 | 0.6641 | 14 |
|
| 62 |
+
|
| 63 |
+
## System Metrics
|
| 64 |
+
|
| 65 |
+
| | Mean | Std | Min | Max | Count |
|
| 66 |
+
|:------------------------|---------:|------------:|---------:|---------:|--------:|
|
| 67 |
+
| system/process_rss_gb | 15.7847 | 2.78836 | 11.793 | 19.0433 | 14 |
|
| 68 |
+
| system/process_vms_gb | 75.8863 | 0.480004 | 75.1254 | 76.4518 | 14 |
|
| 69 |
+
| system/ram_available_gb | 383.263 | 6.21639 | 372.346 | 399.47 | 14 |
|
| 70 |
+
| system/ram_percent | 55.3286 | 0.731099 | 53.4 | 56.6 | 14 |
|
| 71 |
+
| system/ram_total_gb | 857.969 | 2.35957e-13 | 857.969 | 857.969 | 14 |
|
| 72 |
+
| system/ram_used_gb | 474.705 | 6.21637 | 458.499 | 485.623 | 14 |
|
| 73 |
+
|
| 74 |
+
## Timing Metrics
|
| 75 |
+
|
| 76 |
+
| | Mean | Std | Min | Max | Count |
|
| 77 |
+
|:--------------------------------------|-----------:|-------------:|---------:|----------:|--------:|
|
| 78 |
+
| timing/cleanup_old_checkpoints | 1.98634 | 4.18445 | 0.0121 | 12.7402 | 14 |
|
| 79 |
+
| timing/compute_advantages_and_returns | 0.0498 | 0.0212895 | 0.0353 | 0.1146 | 14 |
|
| 80 |
+
| timing/convert_to_training_input | 2.84912 | 0.287621 | 2.4902 | 3.5227 | 14 |
|
| 81 |
+
| timing/fwd_logprobs_values_reward | 76.6691 | 6.77429 | 68.8947 | 93.4902 | 14 |
|
| 82 |
+
| timing/policy_train | 685.656 | 24.3175 | 646.722 | 731.696 | 14 |
|
| 83 |
+
| timing/run_training | 762.709 | 29.2176 | 720.543 | 825.574 | 14 |
|
| 84 |
+
| timing/save_checkpoints | 31.9408 | 5.76727 | 28.7197 | 46.1195 | 14 |
|
| 85 |
+
| timing/step | 3652.97 | 1959.73 | 871.839 | 8330.8 | 14 |
|
| 86 |
+
| timing/sync_weights | 50.7993 | 10.807 | 13.7041 | 57.2936 | 14 |
|
| 87 |
+
| timing/train_critic_and_policy | 685.99 | 24.3066 | 647.134 | 732.032 | 14 |
|
| 88 |
+
| timing/wait_for_generation_buffer | 2836.6 | 1971.02 | 0.0049 | 7519.14 | 14 |
|
| 89 |
+
| timing/save_hf_model | 37.1895 | 0.56611 | 36.7892 | 37.5898 | 2 |
|
| 90 |
+
|
| 91 |
+
## Trainer Metrics
|
| 92 |
+
|
| 93 |
+
| | Mean | Std | Min | Max | Count |
|
| 94 |
+
|:--------------------|-------:|---------:|------:|------:|--------:|
|
| 95 |
+
| trainer/epoch | 0.5 | 0.518875 | 0 | 1 | 14 |
|
| 96 |
+
| trainer/global_step | 7.5 | 4.1833 | 1 | 14 | 14 |
|
| 97 |
+
|
| 98 |
+
## Batch_Errors Metrics
|
| 99 |
+
|
| 100 |
+
| | Mean | Std | Min | Max | Count |
|
| 101 |
+
|:------------------------------------------------|------------:|------------:|-----------:|------------:|--------:|
|
| 102 |
+
| batch_errors/total_batches | 61.4286 | 26.4828 | 11 | 128 | 14 |
|
| 103 |
+
| batch_errors/total_instances | 491.429 | 211.863 | 88 | 1024 | 14 |
|
| 104 |
+
| batch_errors/total_successful | 448.357 | 206.364 | 84 | 994 | 14 |
|
| 105 |
+
| batch_errors/total_failed | 16 | 8.77935 | 0 | 31 | 14 |
|
| 106 |
+
| batch_errors/total_masked | 33.5 | 21.2268 | 0 | 70 | 14 |
|
| 107 |
+
| batch_errors/avg_DaytonaAuthenticationError | 0.148063 | 0.156344 | 0.015625 | 0.584615 | 13 |
|
| 108 |
+
| batch_errors/total_DaytonaAuthenticationError | 8.23077 | 9.49696 | 1 | 38 | 13 |
|
| 109 |
+
| batch_errors/avg_AgentEnvironmentTimeoutError | 0.655148 | 0.273596 | 0.140625 | 1.07812 | 12 |
|
| 110 |
+
| batch_errors/total_AgentEnvironmentTimeoutError | 39 | 16.9973 | 15 | 69 | 12 |
|
| 111 |
+
| batch_errors/avg_VerifierRuntimeError | 0.0195312 | 0.0165728 | 0.0078125 | 0.03125 | 2 |
|
| 112 |
+
| batch_errors/total_VerifierRuntimeError | 1.5 | 0.707107 | 1 | 2 | 2 |
|
| 113 |
+
| batch_errors/avg_VerifierTimeoutError | 0.0585637 | 0.0455868 | 0.0153846 | 0.125 | 8 |
|
| 114 |
+
| batch_errors/total_VerifierTimeoutError | 3 | 2.39046 | 1 | 8 | 8 |
|
| 115 |
+
| batch_errors/avg_DaytonaError | 0.015625 | nan | 0.015625 | 0.015625 | 1 |
|
| 116 |
+
| batch_errors/total_DaytonaError | 1 | nan | 1 | 1 | 1 |
|
| 117 |
+
|
| 118 |
+
## Training Progression by Log
|
| 119 |
+
|
| 120 |
+
### a2_rl_unitsyn_python_v3_402161
|
| 121 |
+
|
| 122 |
+
| Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
|
| 123 |
+
|------|--------|--------|-----|------|---------------|-------------|
|
| 124 |
+
| 1 | 0.5430 | 0.6719 | 0.000000 | 0.0000 | 6093.6 | 5309.3 |
|
| 125 |
+
| 2 | 0.5430 | 0.6875 | 0.000000 | -0.0002 | 2730.8 | 1894.6 |
|
| 126 |
+
| 3 | 0.4531 | 0.6250 | 0.000000 | 0.0000 | 2618.7 | 1802.5 |
|
| 127 |
+
| 4 | 0.4941 | 0.5312 | 0.000000 | 0.0000 | 2255.3 | 1451.1 |
|
| 128 |
+
| 5 | 0.4570 | 0.6094 | 0.000000 | -0.0002 | 3542.6 | 2731.0 |
|
| 129 |
+
| 6 | 0.5586 | 0.7031 | 0.000000 | -0.0002 | 2213.7 | 1408.4 |
|
| 130 |
+
| 7 | 0.3613 | 0.5312 | 0.000000 | -0.0001 | 4983.4 | 4140.6 |
|
| 131 |
+
| 8 | 0.6641 | 0.7188 | 0.000000 | -0.0000 | 5669.0 | 4892.6 |
|
| 132 |
+
| 9 | 0.5605 | 0.6406 | 0.000000 | -0.0000 | 3239.5 | 2444.5 |
|
| 133 |
+
| 10 | 0.4219 | 0.5469 | 0.000000 | -0.0002 | 3338.2 | 2507.9 |
|
| 134 |
+
| 11 | 0.5098 | 0.5625 | 0.000000 | -0.0002 | 2321.6 | 1510.7 |
|
| 135 |
+
| 12 | 0.4785 | 0.6094 | 0.000000 | -0.0001 | 2932.4 | 2100.0 |
|
| 136 |
+
|
| 137 |
+
### a2_rl_unitsyn_python_v3_402162
|
| 138 |
+
|
| 139 |
+
| Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
|
| 140 |
+
|------|--------|--------|-----|------|---------------|-------------|
|
| 141 |
+
| 13 | 0.6406 | 0.7308 | 0.000000 | 0.0000 | 8330.8 | 7519.1 |
|
| 142 |
+
| 14 | 0.4316 | 0.5625 | 0.000000 | -0.0005 | 871.8 | 0.0 |
|
| 143 |
+
|
| 144 |
+
## Timing Analysis
|
| 145 |
+
|
| 146 |
+
### Average Time Breakdown (% of step time)
|
| 147 |
+
|
| 148 |
+
| Component | Avg % of Step Time |
|
| 149 |
+
|-----------|-------------------|
|
| 150 |
+
| wait_for_generation_buffer | 69.8% |
|
| 151 |
+
| run_training | 28.2% |
|
| 152 |
+
| train_critic_and_policy | 25.3% |
|
| 153 |
+
| policy_train | 25.3% |
|
| 154 |
+
| fwd_logprobs_values_reward | 2.9% |
|
| 155 |
+
| sync_weights | 1.9% |
|
| 156 |
+
| save_checkpoints | 1.1% |
|
| 157 |
+
| save_hf_model | 1.1% |
|
| 158 |
+
| convert_to_training_input | 0.1% |
|
| 159 |
+
| cleanup_old_checkpoints | 0.0% |
|
| 160 |
+
| compute_advantages_and_returns | 0.0% |
|
| 161 |
+
|
| 162 |
+
## Cross-Log Comparison
|
| 163 |
+
|
| 164 |
+
| Log | Avg Reward | Pass@8 | Step Time (s) | Gen Wait Time (s) | Avg Tokens | Staleness |
|
| 165 |
+
|-----|------|------|------|------|------|------|
|
| 166 |
+
| a2_rl_unitsyn_python_v3_402161 | 0.5037 | 0.6198 | 3494.9121 | 2682.7702 | 5676.9819 | 2.3607 |
|
| 167 |
+
| a2_rl_unitsyn_python_v3_402162 | 0.5361 | 0.6466 | 4601.3180 | 3759.5732 | 5890.2442 | 1.0703 |
|
| 168 |
+
|
| 169 |
+
## vLLM Inference Engine Analysis
|
| 170 |
+
|
| 171 |
+
Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
|
| 172 |
+
|
| 173 |
+
> **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
|
| 174 |
+
> so we typically capture stats from one engine per timestamp. The stats shown are
|
| 175 |
+
> **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
|
| 176 |
+
|
| 177 |
+
### Summary by Log (Per-Engine Stats)
|
| 178 |
+
|
| 179 |
+
| Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
|
| 180 |
+
|-----|-------------------|-------------------|--------------------------|----------------|------------------|
|
| 181 |
+
| a2_rl_unitsyn_python_v3_402161 | 5.7 | 0.0 | 110.0 tok/s | 8.3% | 88.9% |
|
| 182 |
+
| a2_rl_unitsyn_python_v3_402162 | 5.5 | 0.0 | 111.2 tok/s | 8.6% | 86.6% |
|
| 183 |
+
|
| 184 |
+
### Utilization Analysis (Per-Engine)
|
| 185 |
+
|
| 186 |
+
Key indicators of inference engine utilization:
|
| 187 |
+
|
| 188 |
+
- **Running requests/engine**: Concurrent requests being processed by each engine
|
| 189 |
+
- **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
|
| 190 |
+
- **Generation throughput**: Decode tokens/sec per engine
|
| 191 |
+
- 8B model on H100 can do **1000+ tok/s** when saturated
|
| 192 |
+
- If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
|
| 193 |
+
|
| 194 |
+
#### a2_rl_unitsyn_python_v3_402161
|
| 195 |
+
|
| 196 |
+
- **Running requests/engine**: avg=5.7, max=22
|
| 197 |
+
- **Waiting requests**: avg=0.0, max=0
|
| 198 |
+
- **Generation throughput/engine**: avg=110.0 tok/s, max=412.1 tok/s
|
| 199 |
+
- **KV cache usage**: avg=8.3%
|
| 200 |
+
- **Prefix cache hit rate**: avg=88.9%
|
| 201 |
+
- ⚠️ **Low throughput**: 110 tok/s << expected 1000+ tok/s for saturated 8B model
|
| 202 |
+
|
| 203 |
+
#### a2_rl_unitsyn_python_v3_402162
|
| 204 |
+
|
| 205 |
+
- **Running requests/engine**: avg=5.5, max=21
|
| 206 |
+
- **Waiting requests**: avg=0.0, max=0
|
| 207 |
+
- **Generation throughput/engine**: avg=111.2 tok/s, max=471.0 tok/s
|
| 208 |
+
- **KV cache usage**: avg=8.6%
|
| 209 |
+
- **Prefix cache hit rate**: avg=86.6%
|
| 210 |
+
- ⚠️ **Low throughput**: 111 tok/s << expected 1000+ tok/s for saturated 8B model
|
| 211 |
+
|
| 212 |
+
## Trial-Level Analysis (from result.json)
|
| 213 |
+
|
| 214 |
+
Total trials parsed: 7602
|
| 215 |
+
|
| 216 |
+
### Turn Count Statistics
|
| 217 |
+
|
| 218 |
+
| Metric | Value |
|
| 219 |
+
|--------|-------|
|
| 220 |
+
| Mean | 2.1 |
|
| 221 |
+
| Median | 2.0 |
|
| 222 |
+
| Std | 0.5 |
|
| 223 |
+
| Min | 1 |
|
| 224 |
+
| Max | 7 |
|
| 225 |
+
| Count | 7602 |
|
| 226 |
+
|
| 227 |
+
### Exception Distribution
|
| 228 |
+
|
| 229 |
+
| Exception Type | Count | % |
|
| 230 |
+
|---------------|-------|---|
|
| 231 |
+
| No exception | 6981 | 91.8% |
|
| 232 |
+
| AgentEnvironmentTimeoutError | 476 | 6.3% |
|
| 233 |
+
| DaytonaAuthenticationError | 109 | 1.4% |
|
| 234 |
+
| VerifierTimeoutError | 24 | 0.3% |
|
| 235 |
+
| CancelledError | 7 | 0.1% |
|
| 236 |
+
| VerifierRuntimeError | 3 | 0.0% |
|
| 237 |
+
| DaytonaError | 1 | 0.0% |
|
| 238 |
+
| AgentTimeoutError | 1 | 0.0% |
|
| 239 |
+
|
| 240 |
+
### Turn Count by Exception Type
|
| 241 |
+
|
| 242 |
+
| Exception Type | Mean Turns | Median Turns | Count |
|
| 243 |
+
|---------------|-----------|-------------|-------|
|
| 244 |
+
| AgentTimeoutError | 6.0 | 6.0 | 1 |
|
| 245 |
+
| VerifierTimeoutError | 2.4 | 2.0 | 24 |
|
| 246 |
+
| No exception | 2.2 | 2.0 | 6981 |
|
| 247 |
+
| DaytonaError | 2.0 | 2.0 | 1 |
|
| 248 |
+
| VerifierRuntimeError | 2.0 | 2.0 | 3 |
|
| 249 |
+
| AgentEnvironmentTimeoutError | 1.7 | 1.0 | 476 |
|
| 250 |
+
| DaytonaAuthenticationError | 1.1 | 1.0 | 109 |
|
| 251 |
+
| CancelledError | 1.0 | 1.0 | 7 |
|
| 252 |
+
|
| 253 |
+
### Turn Count by Outcome
|
| 254 |
+
|
| 255 |
+
| Outcome | Mean Turns | Median Turns | Count |
|
| 256 |
+
|---------|-----------|-------------|-------|
|
| 257 |
+
| Success | 2.1 | 2.0 | 3882 |
|
| 258 |
+
| Failure | 2.2 | 2.0 | 3286 |
|
| 259 |
+
|
| 260 |
+
### Reward Summary
|
| 261 |
+
|
| 262 |
+
- Mean reward: 0.5416
|
| 263 |
+
- Success rate: 54.2%
|
| 264 |
+
- Trials with reward data: 7168
|
| 265 |
+
|
training_logs/20260429_184357_metrics_table.csv
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,batch_errors/avg_VerifierRuntimeError,batch_errors/total_VerifierRuntimeError,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError,global_step
|
| 2 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,0,0.0,0,0.0,5121.6914,4576.6295,5769.2436,13905,1499,2503.5505,0.543,0.0021,0.1803,0.0,0.219,0.0,0.0,1.0,0.0,0.0323,0.6719,0.543,11.793,75.1959,379.3328,55.8,857.9687,478.6359,10.9023,0.1146,2.5571,72.0045,656.5065,728.9238,44.3353,6093.5913,52.8301,656.8044,5309.2801,0,1,128,1024,994,14,18,0.0859375,11.0,0.140625,18.0,0.0078125,1.0,,,,,,1
|
| 3 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,1,1.0,1,1.0,5871.2051,5096.5576,6791.5128,15067,1,3402.3282,0.543,-0.0001,0.1554,-0.0002,0.2129,-0.0057,0.0,1.0,0.0,0.0274,0.6875,0.543,12.9813,75.3753,375.7304,56.2,857.9687,482.2383,0.0121,0.0559,2.7428,72.8696,707.4558,780.6952,29.1329,2730.7674,52.7333,707.7694,1894.5939,0,2,64,512,464,19,44,0.046875,3.0,0.6875,44.0,,,0.015625,1.0,,,,2
|
| 4 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,2,2.0,2,1.0,5706.7969,4967.944,6318.9893,14787,1,3398.0642,0.4531,-0.0022,0.1487,0.0,0.198,0.0,0.0,1.0,0.0,0.0284,0.625,0.4531,13.4866,75.6917,379.7476,55.7,857.9687,478.2211,0.3416,0.0392,2.6589,73.1319,687.2284,760.7086,29.5563,2618.6945,52.8218,687.5371,1802.5023,0,3,64,512,491,9,20,0.015625,1.0,0.3125,20.0,,,,,,,,3
|
| 5 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,3,3.0,3,1.0,5825.2578,4940.4704,6689.5483,14273,1,3209.1042,0.4941,-0.0017,0.0659,0.0,0.2102,0.0,0.0,1.0,0.0,0.0177,0.5312,0.4941,13.7198,75.7789,383.0292,55.4,857.9687,474.9395,0.3775,0.038,2.6718,77.6872,669.8871,747.9511,28.8753,2255.3097,53.5368,670.2257,1451.1474,0,4,64,512,456,20,40,0.09375,6.0,0.625,40.0,0.03125,2.0,0.125,8.0,,,,4
|
| 6 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,4,3.75,3,1.0,5520.8398,5594.9188,5458.4856,16082,1,3484.2247,0.457,0.0055,0.1648,-0.0002,0.2057,-0.0064,0.0,1.0,0.0,0.0214,0.6094,0.457,14.6653,76.0574,385.3959,55.1,857.9687,472.5728,0.3196,0.044,2.9597,73.5587,681.9984,755.9482,29.4618,3542.6439,52.7498,682.3452,2730.9604,0,5,64,512,464,14,42,0.09375,6.0,0.65625,42.0,,,,,37.5898,,,5
|
| 7 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,5,4.125,3,1.0,5461.6738,5218.1189,5769.8894,14552,1,3328.2592,0.5586,0.014,0.1023,-0.0002,0.1969,-0.0076,0.0,1.0,0.0,0.0179,0.7031,0.5586,15.0607,76.0109,385.4834,55.1,857.9687,472.4853,0.366,0.0393,2.7127,68.8947,678.4589,747.7171,29.9495,2213.735,54.8743,678.7827,1408.4036,0,6,64,512,474,15,28,0.125,8.0,0.4375,28.0,,,0.03125,2.0,,,,6
|
| 8 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,6,4.6562,4,1.0,7798.1641,7124.3027,8179.4006,20083,1,3802.9833,0.3613,0.0039,0.1358,-0.0001,0.2261,-0.004,0.0,1.0,0.0,0.0185,0.5312,0.3613,18.1326,76.0253,380.9768,55.6,857.9687,476.9919,0.6987,0.0516,3.5227,93.4902,731.6962,825.5737,29.3423,4983.416,13.7041,732.0316,4140.5883,0,7,64,512,512,0,0,,,,,,,,,,,,7
|
| 9 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,0,0.0,0,0.0,4865.5312,4540.8412,5507.3605,14128,1386,2616.8319,0.6641,-0.0047,0.0958,-0.0,0.206,-0.0,0.0,1.0,0.0,0.0163,0.7188,0.6641,18.3456,76.1459,384.4786,55.2,857.9687,473.4901,0.3173,0.0353,2.9251,73.3734,646.7222,720.5432,29.7617,5669.0399,52.8866,647.1342,4892.6495,1,8,64,512,470,16,39,0.046875,3.0,0.609375,39.0,,,,,,,,8
|
| 10 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,1,1.0,1,1.0,5855.5039,5276.4774,6594.0844,13894,1,3271.3689,0.5605,0.0007,0.0512,-0.0,0.2096,-0.0006,0.0,1.0,0.0,0.0219,0.6406,0.5605,18.6552,76.4328,386.8148,54.9,857.9687,471.1539,0.3384,0.0375,2.7372,78.6677,660.2909,739.3591,30.4714,3239.5269,52.9167,660.6535,2444.512,1,9,64,512,432,25,70,0.125,8.0,1.078125,69.0,,,0.03125,2.0,,0.015625,1.0,9
|
| 11 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,2,2.0,2,1.0,5375.2695,5344.5093,5397.7162,14823,1,3872.2484,0.4219,0.0135,0.113,-0.0002,0.1788,-0.007,0.0,1.0,0.0,0.0154,0.5469,0.4219,18.8019,76.4187,385.8892,55.0,857.9687,472.0795,0.3767,0.0413,2.7467,73.8308,700.3324,774.57,29.1637,3338.1958,53.0117,700.6976,2507.8644,1,10,64,512,463,23,37,0.140625,9.0,0.578125,37.0,,,0.046875,3.0,36.7892,,,10
|
| 12 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,3,3.0,3,1.0,5192.8828,4679.4904,5726.7291,15138,1,3311.2403,0.5098,0.0084,0.0758,-0.0002,0.1873,-0.0056,0.0,1.0,0.0,0.0164,0.5625,0.5098,18.9517,76.4518,383.7386,55.3,857.9687,474.23,0.3045,0.0384,2.7622,74.4343,676.0857,750.8928,28.7197,2321.6045,57.2462,676.4197,1510.6996,1,11,64,512,437,26,61,0.140625,9.0,0.953125,61.0,,,0.078125,5.0,,,,11
|
| 13 |
+
a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,4,3.7969,3,1.0,5528.9668,5645.4653,5422.0674,16073,1,3727.4604,0.4785,0.0021,0.1344,-0.0001,0.1796,-0.0038,0.0,1.0,0.0,0.0198,0.6094,0.4785,19.0433,76.3939,383.2556,55.3,857.9687,474.7131,0.3795,0.0401,3.2865,78.4764,697.4553,776.3111,28.7288,2932.4204,52.7552,697.7943,2100.0414,1,12,16,128,110,8,15,0.0625,1.0,0.9375,15.0,,,0.125,2.0,,,,12
|
| 14 |
+
a2_rl_unitsyn_python_v3_402162,0.0,0,64,512,5,1.1406,0,0.2344,5434.5586,5246.1829,5770.3587,13053,1,3046.1974,0.6406,-0.003,0.059,0.0,0.189,0.0,0.0,1.0,0.0,0.0114,0.7308,0.6406,11.8048,75.1254,372.3459,56.6,857.9687,485.6227,12.7402,0.0478,2.4902,73.844,683.1484,757.335,46.1195,8330.7966,51.8296,683.4422,7519.1416,1,13,65,520,426,31,55,0.5846153846153846,38.0,0.8461538461538461,55.0,,,0.015384615384615385,1.0,,,,13
|
| 15 |
+
a2_rl_unitsyn_python_v3_402162,0.0,0,64,512,1,1.0,1,1.0,6345.9297,6347.6109,6344.6529,18003,1,4261.8702,0.4316,0.0217,0.1127,-0.0005,0.1793,-0.0156,0.0,1.0,0.0,0.0191,0.5625,0.4316,15.5447,75.3049,399.4698,53.4,857.9687,458.4989,0.3343,0.0742,3.1141,89.1038,721.9162,811.4036,33.5523,871.8394,57.2936,722.2251,0.0049,1,14,11,88,84,4,0,0.36363636363636365,4.0,,,,,,,,,,14
|
training_logs/20260429_184357_reward_vs_steps.png
ADDED
|
Git LFS Details
|
training_logs/20260429_184357_trial_results.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/20260429_184357_turn_count_distribution.png
ADDED
|
Git LFS Details
|
training_logs/20260429_184357_vllm_metrics_a2_rl_unitsyn_python_v3_402161.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/20260429_184357_vllm_metrics_a2_rl_unitsyn_python_v3_402162.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/20260429_184357_vllm_metrics_table.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|