atutej commited on
Commit
c4b425f
·
verified ·
1 Parent(s): 5bef9fd

Add parsed training metrics and plots

Browse files
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ training_logs/20260429_184357_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
38
+ training_logs/20260429_184357_turn_count_distribution.png filter=lfs diff=lfs merge=lfs -text
training_logs/20260429_184357_metrics_a2_rl_unitsyn_python_v3_402161.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,batch_errors/avg_VerifierRuntimeError,batch_errors/total_VerifierRuntimeError,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError
2
+ 0.0,0,64,512,0,0.0,0,0.0,5121.6914,4576.6295,5769.2436,13905,1499,2503.5505,0.543,0.0021,0.1803,0.0,0.219,0.0,0.0,1.0,0.0,0.0323,0.6719,0.543,11.793,75.1959,379.3328,55.8,857.9687,478.6359,10.9023,0.1146,2.5571,72.0045,656.5065,728.9238,44.3353,6093.5913,52.8301,656.8044,5309.2801,0,1,128,1024,994,14,18,0.0859375,11.0,0.140625,18.0,0.0078125,1.0,,,,,
3
+ 0.0,0,64,512,1,1.0,1,1.0,5871.2051,5096.5576,6791.5128,15067,1,3402.3282,0.543,-0.0001,0.1554,-0.0002,0.2129,-0.0057,0.0,1.0,0.0,0.0274,0.6875,0.543,12.9813,75.3753,375.7304,56.2,857.9687,482.2383,0.0121,0.0559,2.7428,72.8696,707.4558,780.6952,29.1329,2730.7674,52.7333,707.7694,1894.5939,0,2,64,512,464,19,44,0.046875,3.0,0.6875,44.0,,,0.015625,1.0,,,
4
+ 0.0,0,64,512,2,2.0,2,1.0,5706.7969,4967.944,6318.9893,14787,1,3398.0642,0.4531,-0.0022,0.1487,0.0,0.198,0.0,0.0,1.0,0.0,0.0284,0.625,0.4531,13.4866,75.6917,379.7476,55.7,857.9687,478.2211,0.3416,0.0392,2.6589,73.1319,687.2284,760.7086,29.5563,2618.6945,52.8218,687.5371,1802.5023,0,3,64,512,491,9,20,0.015625,1.0,0.3125,20.0,,,,,,,
5
+ 0.0,0,64,512,3,3.0,3,1.0,5825.2578,4940.4704,6689.5483,14273,1,3209.1042,0.4941,-0.0017,0.0659,0.0,0.2102,0.0,0.0,1.0,0.0,0.0177,0.5312,0.4941,13.7198,75.7789,383.0292,55.4,857.9687,474.9395,0.3775,0.038,2.6718,77.6872,669.8871,747.9511,28.8753,2255.3097,53.5368,670.2257,1451.1474,0,4,64,512,456,20,40,0.09375,6.0,0.625,40.0,0.03125,2.0,0.125,8.0,,,
6
+ 0.0,0,64,512,4,3.75,3,1.0,5520.8398,5594.9188,5458.4856,16082,1,3484.2247,0.457,0.0055,0.1648,-0.0002,0.2057,-0.0064,0.0,1.0,0.0,0.0214,0.6094,0.457,14.6653,76.0574,385.3959,55.1,857.9687,472.5728,0.3196,0.044,2.9597,73.5587,681.9984,755.9482,29.4618,3542.6439,52.7498,682.3452,2730.9604,0,5,64,512,464,14,42,0.09375,6.0,0.65625,42.0,,,,,37.5898,,
7
+ 0.0,0,64,512,5,4.125,3,1.0,5461.6738,5218.1189,5769.8894,14552,1,3328.2592,0.5586,0.014,0.1023,-0.0002,0.1969,-0.0076,0.0,1.0,0.0,0.0179,0.7031,0.5586,15.0607,76.0109,385.4834,55.1,857.9687,472.4853,0.366,0.0393,2.7127,68.8947,678.4589,747.7171,29.9495,2213.735,54.8743,678.7827,1408.4036,0,6,64,512,474,15,28,0.125,8.0,0.4375,28.0,,,0.03125,2.0,,,
8
+ 0.0,0,64,512,6,4.6562,4,1.0,7798.1641,7124.3027,8179.4006,20083,1,3802.9833,0.3613,0.0039,0.1358,-0.0001,0.2261,-0.004,0.0,1.0,0.0,0.0185,0.5312,0.3613,18.1326,76.0253,380.9768,55.6,857.9687,476.9919,0.6987,0.0516,3.5227,93.4902,731.6962,825.5737,29.3423,4983.416,13.7041,732.0316,4140.5883,0,7,64,512,512,0,0,,,,,,,,,,,
9
+ 0.0,0,64,512,0,0.0,0,0.0,4865.5312,4540.8412,5507.3605,14128,1386,2616.8319,0.6641,-0.0047,0.0958,-0.0,0.206,-0.0,0.0,1.0,0.0,0.0163,0.7188,0.6641,18.3456,76.1459,384.4786,55.2,857.9687,473.4901,0.3173,0.0353,2.9251,73.3734,646.7222,720.5432,29.7617,5669.0399,52.8866,647.1342,4892.6495,1,8,64,512,470,16,39,0.046875,3.0,0.609375,39.0,,,,,,,
10
+ 0.0,0,64,512,1,1.0,1,1.0,5855.5039,5276.4774,6594.0844,13894,1,3271.3689,0.5605,0.0007,0.0512,-0.0,0.2096,-0.0006,0.0,1.0,0.0,0.0219,0.6406,0.5605,18.6552,76.4328,386.8148,54.9,857.9687,471.1539,0.3384,0.0375,2.7372,78.6677,660.2909,739.3591,30.4714,3239.5269,52.9167,660.6535,2444.512,1,9,64,512,432,25,70,0.125,8.0,1.078125,69.0,,,0.03125,2.0,,0.015625,1.0
11
+ 0.0,0,64,512,2,2.0,2,1.0,5375.2695,5344.5093,5397.7162,14823,1,3872.2484,0.4219,0.0135,0.113,-0.0002,0.1788,-0.007,0.0,1.0,0.0,0.0154,0.5469,0.4219,18.8019,76.4187,385.8892,55.0,857.9687,472.0795,0.3767,0.0413,2.7467,73.8308,700.3324,774.57,29.1637,3338.1958,53.0117,700.6976,2507.8644,1,10,64,512,463,23,37,0.140625,9.0,0.578125,37.0,,,0.046875,3.0,36.7892,,
12
+ 0.0,0,64,512,3,3.0,3,1.0,5192.8828,4679.4904,5726.7291,15138,1,3311.2403,0.5098,0.0084,0.0758,-0.0002,0.1873,-0.0056,0.0,1.0,0.0,0.0164,0.5625,0.5098,18.9517,76.4518,383.7386,55.3,857.9687,474.23,0.3045,0.0384,2.7622,74.4343,676.0857,750.8928,28.7197,2321.6045,57.2462,676.4197,1510.6996,1,11,64,512,437,26,61,0.140625,9.0,0.953125,61.0,,,0.078125,5.0,,,
13
+ 0.0,0,64,512,4,3.7969,3,1.0,5528.9668,5645.4653,5422.0674,16073,1,3727.4604,0.4785,0.0021,0.1344,-0.0001,0.1796,-0.0038,0.0,1.0,0.0,0.0198,0.6094,0.4785,19.0433,76.3939,383.2556,55.3,857.9687,474.7131,0.3795,0.0401,3.2865,78.4764,697.4553,776.3111,28.7288,2932.4204,52.7552,697.7943,2100.0414,1,12,16,128,110,8,15,0.0625,1.0,0.9375,15.0,,,0.125,2.0,,,
training_logs/20260429_184357_metrics_a2_rl_unitsyn_python_v3_402162.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError
2
+ 0.0,0,64,512,5,1.1406,0,0.2344,5434.5586,5246.1829,5770.3587,13053,1,3046.1974,0.6406,-0.003,0.059,0.0,0.189,0.0,0.0,1.0,0.0,0.0114,0.7308,0.6406,11.8048,75.1254,372.3459,56.6,857.9687,485.6227,12.7402,0.0478,2.4902,73.844,683.1484,757.335,46.1195,8330.7966,51.8296,683.4422,7519.1416,1,13,65,520,426,31,55,0.5846153846153846,38,0.8461538461538461,55.0,0.015384615384615385,1.0
3
+ 0.0,0,64,512,1,1.0,1,1.0,6345.9297,6347.6109,6344.6529,18003,1,4261.8702,0.4316,0.0217,0.1127,-0.0005,0.1793,-0.0156,0.0,1.0,0.0,0.0191,0.5625,0.4316,15.5447,75.3049,399.4698,53.4,857.9687,458.4989,0.3343,0.0742,3.1141,89.1038,721.9162,811.4036,33.5523,871.8394,57.2936,722.2251,0.0049,1,14,11,88,84,4,0,0.36363636363636365,4,,,,
training_logs/20260429_184357_metrics_report.md ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SkyRL Training Metrics Analysis
2
+
3
+ Generated from 2 log files
4
+
5
+ ## Overview
6
+
7
+ | Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
8
+ |----------|-------------|---------------|---------------------|-------------------|----------------|
9
+ | a2_rl_unitsyn_python_v3_402161 | 12 | 12 | 0.5037 | 0.6641 | 41938.9 |
10
+ | a2_rl_unitsyn_python_v3_402162 | 14 | 2 | 0.5361 | 0.6406 | 9202.6 |
11
+
12
+ ## Async Metrics
13
+
14
+ | | Mean | Std | Min | Max | Count |
15
+ |:------------------------------|-----------:|---------:|------:|---------:|--------:|
16
+ | async/discard_rate | 0 | 0 | 0 | 0 | 14 |
17
+ | async/discarded_count | 0 | 0 | 0 | 0 | 14 |
18
+ | async/effective_batch_groups | 64 | 0 | 64 | 64 | 14 |
19
+ | async/effective_batch_samples | 512 | 0 | 512 | 512 | 14 |
20
+ | async/staleness_max | 2.64286 | 1.94569 | 0 | 6 | 14 |
21
+ | async/staleness_mean | 2.17634 | 1.54834 | 0 | 4.6562 | 14 |
22
+ | async/staleness_min | 1.85714 | 1.35062 | 0 | 4 | 14 |
23
+ | async/staleness_ratio | 0.802457 | 0.396118 | 0 | 1 | 14 |
24
+
25
+ ## Generate Metrics
26
+
27
+ | | Mean | Std | Min | Max | Count |
28
+ |:-------------------------------------|----------:|---------:|---------:|---------:|--------:|
29
+ | generate/avg_num_tokens | 5707.45 | 705.472 | 4865.53 | 7798.16 | 14 |
30
+ | generate/avg_tokens_non_zero_rewards | 5328.54 | 701.545 | 4540.84 | 7124.3 | 14 |
31
+ | generate/avg_tokens_zero_rewards | 6124.29 | 768.378 | 5397.72 | 8179.4 | 14 |
32
+ | generate/max_num_tokens | 15275.8 | 1837.96 | 13053 | 20083 | 14 |
33
+ | generate/min_num_tokens | 206.929 | 523.93 | 1 | 1499 | 14 |
34
+ | generate/std_num_tokens | 3373.98 | 467.502 | 2503.55 | 4261.87 | 14 |
35
+
36
+ ## Loss Metrics
37
+
38
+ | | Mean | Std | Min | Max | Count |
39
+ |:----------------------------|---------:|-----------:|--------:|-------:|--------:|
40
+ | loss/avg_final_rewards | 0.508364 | 0.083743 | 0.3613 | 0.6641 | 14 |
41
+ | loss/avg_raw_advantages | 0.0043 | 0.00761799 | -0.0047 | 0.0217 | 14 |
42
+ | loss/avg_raw_advantages_abs | 0.113936 | 0.041136 | 0.0512 | 0.1803 | 14 |
43
+
44
+ ## Policy Metrics
45
+
46
+ | | Mean | Std | Min | Max | Count |
47
+ |:---------------------------|-------------:|------------:|--------:|-------:|--------:|
48
+ | policy/final_loss | -0.000121429 | 0.000142389 | -0.0005 | 0 | 14 |
49
+ | policy/policy_entropy | 0.199886 | 0.0153501 | 0.1788 | 0.2261 | 14 |
50
+ | policy/policy_loss | -0.00402143 | 0.00446631 | -0.0156 | 0 | 14 |
51
+ | policy/policy_lr | 0 | 0 | 0 | 0 | 14 |
52
+ | policy/policy_update_steps | 1 | 0 | 1 | 1 | 14 |
53
+ | policy/ppo_clip_ratio | 0 | 0 | 0 | 0 | 14 |
54
+ | policy/raw_grad_norm | 0.0202786 | 0.00565457 | 0.0114 | 0.0323 | 14 |
55
+
56
+ ## Reward Metrics
57
+
58
+ | | Mean | Std | Min | Max | Count |
59
+ |:----------------------|---------:|----------:|-------:|-------:|--------:|
60
+ | reward/avg_pass_at_8 | 0.623629 | 0.0703892 | 0.5312 | 0.7308 | 14 |
61
+ | reward/avg_raw_reward | 0.508364 | 0.083743 | 0.3613 | 0.6641 | 14 |
62
+
63
+ ## System Metrics
64
+
65
+ | | Mean | Std | Min | Max | Count |
66
+ |:------------------------|---------:|------------:|---------:|---------:|--------:|
67
+ | system/process_rss_gb | 15.7847 | 2.78836 | 11.793 | 19.0433 | 14 |
68
+ | system/process_vms_gb | 75.8863 | 0.480004 | 75.1254 | 76.4518 | 14 |
69
+ | system/ram_available_gb | 383.263 | 6.21639 | 372.346 | 399.47 | 14 |
70
+ | system/ram_percent | 55.3286 | 0.731099 | 53.4 | 56.6 | 14 |
71
+ | system/ram_total_gb | 857.969 | 2.35957e-13 | 857.969 | 857.969 | 14 |
72
+ | system/ram_used_gb | 474.705 | 6.21637 | 458.499 | 485.623 | 14 |
73
+
74
+ ## Timing Metrics
75
+
76
+ | | Mean | Std | Min | Max | Count |
77
+ |:--------------------------------------|-----------:|-------------:|---------:|----------:|--------:|
78
+ | timing/cleanup_old_checkpoints | 1.98634 | 4.18445 | 0.0121 | 12.7402 | 14 |
79
+ | timing/compute_advantages_and_returns | 0.0498 | 0.0212895 | 0.0353 | 0.1146 | 14 |
80
+ | timing/convert_to_training_input | 2.84912 | 0.287621 | 2.4902 | 3.5227 | 14 |
81
+ | timing/fwd_logprobs_values_reward | 76.6691 | 6.77429 | 68.8947 | 93.4902 | 14 |
82
+ | timing/policy_train | 685.656 | 24.3175 | 646.722 | 731.696 | 14 |
83
+ | timing/run_training | 762.709 | 29.2176 | 720.543 | 825.574 | 14 |
84
+ | timing/save_checkpoints | 31.9408 | 5.76727 | 28.7197 | 46.1195 | 14 |
85
+ | timing/step | 3652.97 | 1959.73 | 871.839 | 8330.8 | 14 |
86
+ | timing/sync_weights | 50.7993 | 10.807 | 13.7041 | 57.2936 | 14 |
87
+ | timing/train_critic_and_policy | 685.99 | 24.3066 | 647.134 | 732.032 | 14 |
88
+ | timing/wait_for_generation_buffer | 2836.6 | 1971.02 | 0.0049 | 7519.14 | 14 |
89
+ | timing/save_hf_model | 37.1895 | 0.56611 | 36.7892 | 37.5898 | 2 |
90
+
91
+ ## Trainer Metrics
92
+
93
+ | | Mean | Std | Min | Max | Count |
94
+ |:--------------------|-------:|---------:|------:|------:|--------:|
95
+ | trainer/epoch | 0.5 | 0.518875 | 0 | 1 | 14 |
96
+ | trainer/global_step | 7.5 | 4.1833 | 1 | 14 | 14 |
97
+
98
+ ## Batch_Errors Metrics
99
+
100
+ | | Mean | Std | Min | Max | Count |
101
+ |:------------------------------------------------|------------:|------------:|-----------:|------------:|--------:|
102
+ | batch_errors/total_batches | 61.4286 | 26.4828 | 11 | 128 | 14 |
103
+ | batch_errors/total_instances | 491.429 | 211.863 | 88 | 1024 | 14 |
104
+ | batch_errors/total_successful | 448.357 | 206.364 | 84 | 994 | 14 |
105
+ | batch_errors/total_failed | 16 | 8.77935 | 0 | 31 | 14 |
106
+ | batch_errors/total_masked | 33.5 | 21.2268 | 0 | 70 | 14 |
107
+ | batch_errors/avg_DaytonaAuthenticationError | 0.148063 | 0.156344 | 0.015625 | 0.584615 | 13 |
108
+ | batch_errors/total_DaytonaAuthenticationError | 8.23077 | 9.49696 | 1 | 38 | 13 |
109
+ | batch_errors/avg_AgentEnvironmentTimeoutError | 0.655148 | 0.273596 | 0.140625 | 1.07812 | 12 |
110
+ | batch_errors/total_AgentEnvironmentTimeoutError | 39 | 16.9973 | 15 | 69 | 12 |
111
+ | batch_errors/avg_VerifierRuntimeError | 0.0195312 | 0.0165728 | 0.0078125 | 0.03125 | 2 |
112
+ | batch_errors/total_VerifierRuntimeError | 1.5 | 0.707107 | 1 | 2 | 2 |
113
+ | batch_errors/avg_VerifierTimeoutError | 0.0585637 | 0.0455868 | 0.0153846 | 0.125 | 8 |
114
+ | batch_errors/total_VerifierTimeoutError | 3 | 2.39046 | 1 | 8 | 8 |
115
+ | batch_errors/avg_DaytonaError | 0.015625 | nan | 0.015625 | 0.015625 | 1 |
116
+ | batch_errors/total_DaytonaError | 1 | nan | 1 | 1 | 1 |
117
+
118
+ ## Training Progression by Log
119
+
120
+ ### a2_rl_unitsyn_python_v3_402161
121
+
122
+ | Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
123
+ |------|--------|--------|-----|------|---------------|-------------|
124
+ | 1 | 0.5430 | 0.6719 | 0.000000 | 0.0000 | 6093.6 | 5309.3 |
125
+ | 2 | 0.5430 | 0.6875 | 0.000000 | -0.0002 | 2730.8 | 1894.6 |
126
+ | 3 | 0.4531 | 0.6250 | 0.000000 | 0.0000 | 2618.7 | 1802.5 |
127
+ | 4 | 0.4941 | 0.5312 | 0.000000 | 0.0000 | 2255.3 | 1451.1 |
128
+ | 5 | 0.4570 | 0.6094 | 0.000000 | -0.0002 | 3542.6 | 2731.0 |
129
+ | 6 | 0.5586 | 0.7031 | 0.000000 | -0.0002 | 2213.7 | 1408.4 |
130
+ | 7 | 0.3613 | 0.5312 | 0.000000 | -0.0001 | 4983.4 | 4140.6 |
131
+ | 8 | 0.6641 | 0.7188 | 0.000000 | -0.0000 | 5669.0 | 4892.6 |
132
+ | 9 | 0.5605 | 0.6406 | 0.000000 | -0.0000 | 3239.5 | 2444.5 |
133
+ | 10 | 0.4219 | 0.5469 | 0.000000 | -0.0002 | 3338.2 | 2507.9 |
134
+ | 11 | 0.5098 | 0.5625 | 0.000000 | -0.0002 | 2321.6 | 1510.7 |
135
+ | 12 | 0.4785 | 0.6094 | 0.000000 | -0.0001 | 2932.4 | 2100.0 |
136
+
137
+ ### a2_rl_unitsyn_python_v3_402162
138
+
139
+ | Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
140
+ |------|--------|--------|-----|------|---------------|-------------|
141
+ | 13 | 0.6406 | 0.7308 | 0.000000 | 0.0000 | 8330.8 | 7519.1 |
142
+ | 14 | 0.4316 | 0.5625 | 0.000000 | -0.0005 | 871.8 | 0.0 |
143
+
144
+ ## Timing Analysis
145
+
146
+ ### Average Time Breakdown (% of step time)
147
+
148
+ | Component | Avg % of Step Time |
149
+ |-----------|-------------------|
150
+ | wait_for_generation_buffer | 69.8% |
151
+ | run_training | 28.2% |
152
+ | train_critic_and_policy | 25.3% |
153
+ | policy_train | 25.3% |
154
+ | fwd_logprobs_values_reward | 2.9% |
155
+ | sync_weights | 1.9% |
156
+ | save_checkpoints | 1.1% |
157
+ | save_hf_model | 1.1% |
158
+ | convert_to_training_input | 0.1% |
159
+ | cleanup_old_checkpoints | 0.0% |
160
+ | compute_advantages_and_returns | 0.0% |
161
+
162
+ ## Cross-Log Comparison
163
+
164
+ | Log | Avg Reward | Pass@8 | Step Time (s) | Gen Wait Time (s) | Avg Tokens | Staleness |
165
+ |-----|------|------|------|------|------|------|
166
+ | a2_rl_unitsyn_python_v3_402161 | 0.5037 | 0.6198 | 3494.9121 | 2682.7702 | 5676.9819 | 2.3607 |
167
+ | a2_rl_unitsyn_python_v3_402162 | 0.5361 | 0.6466 | 4601.3180 | 3759.5732 | 5890.2442 | 1.0703 |
168
+
169
+ ## vLLM Inference Engine Analysis
170
+
171
+ Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
172
+
173
+ > **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
174
+ > so we typically capture stats from one engine per timestamp. The stats shown are
175
+ > **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
176
+
177
+ ### Summary by Log (Per-Engine Stats)
178
+
179
+ | Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
180
+ |-----|-------------------|-------------------|--------------------------|----------------|------------------|
181
+ | a2_rl_unitsyn_python_v3_402161 | 5.7 | 0.0 | 110.0 tok/s | 8.3% | 88.9% |
182
+ | a2_rl_unitsyn_python_v3_402162 | 5.5 | 0.0 | 111.2 tok/s | 8.6% | 86.6% |
183
+
184
+ ### Utilization Analysis (Per-Engine)
185
+
186
+ Key indicators of inference engine utilization:
187
+
188
+ - **Running requests/engine**: Concurrent requests being processed by each engine
189
+ - **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
190
+ - **Generation throughput**: Decode tokens/sec per engine
191
+ - 8B model on H100 can do **1000+ tok/s** when saturated
192
+ - If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
193
+
194
+ #### a2_rl_unitsyn_python_v3_402161
195
+
196
+ - **Running requests/engine**: avg=5.7, max=22
197
+ - **Waiting requests**: avg=0.0, max=0
198
+ - **Generation throughput/engine**: avg=110.0 tok/s, max=412.1 tok/s
199
+ - **KV cache usage**: avg=8.3%
200
+ - **Prefix cache hit rate**: avg=88.9%
201
+ - ⚠️ **Low throughput**: 110 tok/s << expected 1000+ tok/s for saturated 8B model
202
+
203
+ #### a2_rl_unitsyn_python_v3_402162
204
+
205
+ - **Running requests/engine**: avg=5.5, max=21
206
+ - **Waiting requests**: avg=0.0, max=0
207
+ - **Generation throughput/engine**: avg=111.2 tok/s, max=471.0 tok/s
208
+ - **KV cache usage**: avg=8.6%
209
+ - **Prefix cache hit rate**: avg=86.6%
210
+ - ⚠️ **Low throughput**: 111 tok/s << expected 1000+ tok/s for saturated 8B model
211
+
212
+ ## Trial-Level Analysis (from result.json)
213
+
214
+ Total trials parsed: 7602
215
+
216
+ ### Turn Count Statistics
217
+
218
+ | Metric | Value |
219
+ |--------|-------|
220
+ | Mean | 2.1 |
221
+ | Median | 2.0 |
222
+ | Std | 0.5 |
223
+ | Min | 1 |
224
+ | Max | 7 |
225
+ | Count | 7602 |
226
+
227
+ ### Exception Distribution
228
+
229
+ | Exception Type | Count | % |
230
+ |---------------|-------|---|
231
+ | No exception | 6981 | 91.8% |
232
+ | AgentEnvironmentTimeoutError | 476 | 6.3% |
233
+ | DaytonaAuthenticationError | 109 | 1.4% |
234
+ | VerifierTimeoutError | 24 | 0.3% |
235
+ | CancelledError | 7 | 0.1% |
236
+ | VerifierRuntimeError | 3 | 0.0% |
237
+ | DaytonaError | 1 | 0.0% |
238
+ | AgentTimeoutError | 1 | 0.0% |
239
+
240
+ ### Turn Count by Exception Type
241
+
242
+ | Exception Type | Mean Turns | Median Turns | Count |
243
+ |---------------|-----------|-------------|-------|
244
+ | AgentTimeoutError | 6.0 | 6.0 | 1 |
245
+ | VerifierTimeoutError | 2.4 | 2.0 | 24 |
246
+ | No exception | 2.2 | 2.0 | 6981 |
247
+ | DaytonaError | 2.0 | 2.0 | 1 |
248
+ | VerifierRuntimeError | 2.0 | 2.0 | 3 |
249
+ | AgentEnvironmentTimeoutError | 1.7 | 1.0 | 476 |
250
+ | DaytonaAuthenticationError | 1.1 | 1.0 | 109 |
251
+ | CancelledError | 1.0 | 1.0 | 7 |
252
+
253
+ ### Turn Count by Outcome
254
+
255
+ | Outcome | Mean Turns | Median Turns | Count |
256
+ |---------|-----------|-------------|-------|
257
+ | Success | 2.1 | 2.0 | 3882 |
258
+ | Failure | 2.2 | 2.0 | 3286 |
259
+
260
+ ### Reward Summary
261
+
262
+ - Mean reward: 0.5416
263
+ - Success rate: 54.2%
264
+ - Trials with reward data: 7168
265
+
training_logs/20260429_184357_metrics_table.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,batch_errors/avg_VerifierRuntimeError,batch_errors/total_VerifierRuntimeError,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError,global_step
2
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,0,0.0,0,0.0,5121.6914,4576.6295,5769.2436,13905,1499,2503.5505,0.543,0.0021,0.1803,0.0,0.219,0.0,0.0,1.0,0.0,0.0323,0.6719,0.543,11.793,75.1959,379.3328,55.8,857.9687,478.6359,10.9023,0.1146,2.5571,72.0045,656.5065,728.9238,44.3353,6093.5913,52.8301,656.8044,5309.2801,0,1,128,1024,994,14,18,0.0859375,11.0,0.140625,18.0,0.0078125,1.0,,,,,,1
3
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,1,1.0,1,1.0,5871.2051,5096.5576,6791.5128,15067,1,3402.3282,0.543,-0.0001,0.1554,-0.0002,0.2129,-0.0057,0.0,1.0,0.0,0.0274,0.6875,0.543,12.9813,75.3753,375.7304,56.2,857.9687,482.2383,0.0121,0.0559,2.7428,72.8696,707.4558,780.6952,29.1329,2730.7674,52.7333,707.7694,1894.5939,0,2,64,512,464,19,44,0.046875,3.0,0.6875,44.0,,,0.015625,1.0,,,,2
4
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,2,2.0,2,1.0,5706.7969,4967.944,6318.9893,14787,1,3398.0642,0.4531,-0.0022,0.1487,0.0,0.198,0.0,0.0,1.0,0.0,0.0284,0.625,0.4531,13.4866,75.6917,379.7476,55.7,857.9687,478.2211,0.3416,0.0392,2.6589,73.1319,687.2284,760.7086,29.5563,2618.6945,52.8218,687.5371,1802.5023,0,3,64,512,491,9,20,0.015625,1.0,0.3125,20.0,,,,,,,,3
5
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,3,3.0,3,1.0,5825.2578,4940.4704,6689.5483,14273,1,3209.1042,0.4941,-0.0017,0.0659,0.0,0.2102,0.0,0.0,1.0,0.0,0.0177,0.5312,0.4941,13.7198,75.7789,383.0292,55.4,857.9687,474.9395,0.3775,0.038,2.6718,77.6872,669.8871,747.9511,28.8753,2255.3097,53.5368,670.2257,1451.1474,0,4,64,512,456,20,40,0.09375,6.0,0.625,40.0,0.03125,2.0,0.125,8.0,,,,4
6
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,4,3.75,3,1.0,5520.8398,5594.9188,5458.4856,16082,1,3484.2247,0.457,0.0055,0.1648,-0.0002,0.2057,-0.0064,0.0,1.0,0.0,0.0214,0.6094,0.457,14.6653,76.0574,385.3959,55.1,857.9687,472.5728,0.3196,0.044,2.9597,73.5587,681.9984,755.9482,29.4618,3542.6439,52.7498,682.3452,2730.9604,0,5,64,512,464,14,42,0.09375,6.0,0.65625,42.0,,,,,37.5898,,,5
7
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,5,4.125,3,1.0,5461.6738,5218.1189,5769.8894,14552,1,3328.2592,0.5586,0.014,0.1023,-0.0002,0.1969,-0.0076,0.0,1.0,0.0,0.0179,0.7031,0.5586,15.0607,76.0109,385.4834,55.1,857.9687,472.4853,0.366,0.0393,2.7127,68.8947,678.4589,747.7171,29.9495,2213.735,54.8743,678.7827,1408.4036,0,6,64,512,474,15,28,0.125,8.0,0.4375,28.0,,,0.03125,2.0,,,,6
8
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,6,4.6562,4,1.0,7798.1641,7124.3027,8179.4006,20083,1,3802.9833,0.3613,0.0039,0.1358,-0.0001,0.2261,-0.004,0.0,1.0,0.0,0.0185,0.5312,0.3613,18.1326,76.0253,380.9768,55.6,857.9687,476.9919,0.6987,0.0516,3.5227,93.4902,731.6962,825.5737,29.3423,4983.416,13.7041,732.0316,4140.5883,0,7,64,512,512,0,0,,,,,,,,,,,,7
9
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,0,0.0,0,0.0,4865.5312,4540.8412,5507.3605,14128,1386,2616.8319,0.6641,-0.0047,0.0958,-0.0,0.206,-0.0,0.0,1.0,0.0,0.0163,0.7188,0.6641,18.3456,76.1459,384.4786,55.2,857.9687,473.4901,0.3173,0.0353,2.9251,73.3734,646.7222,720.5432,29.7617,5669.0399,52.8866,647.1342,4892.6495,1,8,64,512,470,16,39,0.046875,3.0,0.609375,39.0,,,,,,,,8
10
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,1,1.0,1,1.0,5855.5039,5276.4774,6594.0844,13894,1,3271.3689,0.5605,0.0007,0.0512,-0.0,0.2096,-0.0006,0.0,1.0,0.0,0.0219,0.6406,0.5605,18.6552,76.4328,386.8148,54.9,857.9687,471.1539,0.3384,0.0375,2.7372,78.6677,660.2909,739.3591,30.4714,3239.5269,52.9167,660.6535,2444.512,1,9,64,512,432,25,70,0.125,8.0,1.078125,69.0,,,0.03125,2.0,,0.015625,1.0,9
11
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,2,2.0,2,1.0,5375.2695,5344.5093,5397.7162,14823,1,3872.2484,0.4219,0.0135,0.113,-0.0002,0.1788,-0.007,0.0,1.0,0.0,0.0154,0.5469,0.4219,18.8019,76.4187,385.8892,55.0,857.9687,472.0795,0.3767,0.0413,2.7467,73.8308,700.3324,774.57,29.1637,3338.1958,53.0117,700.6976,2507.8644,1,10,64,512,463,23,37,0.140625,9.0,0.578125,37.0,,,0.046875,3.0,36.7892,,,10
12
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,3,3.0,3,1.0,5192.8828,4679.4904,5726.7291,15138,1,3311.2403,0.5098,0.0084,0.0758,-0.0002,0.1873,-0.0056,0.0,1.0,0.0,0.0164,0.5625,0.5098,18.9517,76.4518,383.7386,55.3,857.9687,474.23,0.3045,0.0384,2.7622,74.4343,676.0857,750.8928,28.7197,2321.6045,57.2462,676.4197,1510.6996,1,11,64,512,437,26,61,0.140625,9.0,0.953125,61.0,,,0.078125,5.0,,,,11
13
+ a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,4,3.7969,3,1.0,5528.9668,5645.4653,5422.0674,16073,1,3727.4604,0.4785,0.0021,0.1344,-0.0001,0.1796,-0.0038,0.0,1.0,0.0,0.0198,0.6094,0.4785,19.0433,76.3939,383.2556,55.3,857.9687,474.7131,0.3795,0.0401,3.2865,78.4764,697.4553,776.3111,28.7288,2932.4204,52.7552,697.7943,2100.0414,1,12,16,128,110,8,15,0.0625,1.0,0.9375,15.0,,,0.125,2.0,,,,12
14
+ a2_rl_unitsyn_python_v3_402162,0.0,0,64,512,5,1.1406,0,0.2344,5434.5586,5246.1829,5770.3587,13053,1,3046.1974,0.6406,-0.003,0.059,0.0,0.189,0.0,0.0,1.0,0.0,0.0114,0.7308,0.6406,11.8048,75.1254,372.3459,56.6,857.9687,485.6227,12.7402,0.0478,2.4902,73.844,683.1484,757.335,46.1195,8330.7966,51.8296,683.4422,7519.1416,1,13,65,520,426,31,55,0.5846153846153846,38.0,0.8461538461538461,55.0,,,0.015384615384615385,1.0,,,,13
15
+ a2_rl_unitsyn_python_v3_402162,0.0,0,64,512,1,1.0,1,1.0,6345.9297,6347.6109,6344.6529,18003,1,4261.8702,0.4316,0.0217,0.1127,-0.0005,0.1793,-0.0156,0.0,1.0,0.0,0.0191,0.5625,0.4316,15.5447,75.3049,399.4698,53.4,857.9687,458.4989,0.3343,0.0742,3.1141,89.1038,721.9162,811.4036,33.5523,871.8394,57.2936,722.2251,0.0049,1,14,11,88,84,4,0,0.36363636363636365,4.0,,,,,,,,,,14
training_logs/20260429_184357_reward_vs_steps.png ADDED

Git LFS Details

  • SHA256: b02fc1b1e4e650eeb87f0d0987a2b57a4c90d0c0e41d359207a01f4e8bdc3417
  • Pointer size: 131 Bytes
  • Size of remote file: 151 kB
training_logs/20260429_184357_trial_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260429_184357_turn_count_distribution.png ADDED

Git LFS Details

  • SHA256: 401a5863733e2067ea1fe3fe05c634f6893bb68c410018c8d0e718561431f766
  • Pointer size: 131 Bytes
  • Size of remote file: 109 kB
training_logs/20260429_184357_vllm_metrics_a2_rl_unitsyn_python_v3_402161.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260429_184357_vllm_metrics_a2_rl_unitsyn_python_v3_402162.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260429_184357_vllm_metrics_table.csv ADDED
The diff for this file is too large to render. See raw diff