atutej commited on
Commit
9eac5bc
·
verified ·
1 Parent(s): 5d5ae45

Add parsed training metrics and plots

Browse files
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ training_logs/20260428_191936_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
38
+ training_logs/20260428_191936_turn_count_distribution.png filter=lfs diff=lfs merge=lfs -text
training_logs/20260428_191936_metrics_a2_rl_stack_php_v2_400876.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError
2
+ 0.0,0,64,512,0,0.0,0,0.0,5173.5488,5581.1377,364.0,16498,1,3153.1045,0.9219,0.0054,0.0054,-0.0001,0.1612,-0.0039,0.0,1.0,0.0,0.0019,1.0,0.9219,13.9041,75.2886,363.3583,57.6,857.9687,494.6104,13.0164,0.1382,3.3894,86.136,726.5471,813.1269,45.5888,8812.9184,51.6468,726.8524,7944.7551,0,1,128,1024,896,58,126,0.015625,2.0,0.984375,126.0,,,
3
+ 0.0,0,64,512,1,1.0,1,1.0,4930.5762,5967.7683,1.0,17443,1,3568.3347,0.8262,0.0,0.0,0.0,0.1245,0.0,0.0,1.0,0.0,0.0,1.0,0.8262,15.6279,75.6176,360.3593,58.0,857.9687,497.6094,0.0131,0.0414,3.5316,74.6536,709.0029,784.0235,27.6606,3571.7276,54.3544,709.3282,2729.8153,0,2,64,512,459,21,53,,,0.828125,53.0,,,
4
+ 0.0,0,64,512,2,2.0,2,1.0,5124.7324,5696.0591,377.5273,15485,1,3123.3346,0.8926,0.0,0.0,0.0,0.1246,0.0,0.0,1.0,0.0,0.0,1.0,0.8926,15.9937,75.74,358.4,58.2,857.9687,499.5687,0.3129,0.0457,3.1477,83.4744,695.0441,778.8675,28.7517,3180.0156,51.0655,695.3471,2346.9327,0,3,64,512,472,20,40,,,0.625,40.0,,,
5
+ 0.0,0,64,512,3,2.9844,2,1.0,5176.248,5614.8284,1.0,16890,1,2922.3081,0.9219,0.0,0.0,0.0,0.115,0.0,0.0,1.0,0.0,0.0,1.0,0.9219,16.2626,75.8032,361.1512,57.9,857.9687,496.8175,0.401,0.0417,3.3777,74.1559,668.7475,743.2802,28.5269,3353.6926,50.6787,669.0823,2556.3534,0,4,64,512,473,21,39,,,0.609375,39.0,,,
6
+ 0.0,0,64,512,4,3.9375,3,1.0,5691.5176,6149.7648,284.2,26038,1,3437.1652,0.9219,-0.0008,0.0069,-0.0,0.1112,-0.0,0.0,1.0,0.0,0.0032,1.0,0.9219,21.1017,75.9924,362.8714,57.7,857.9687,495.0973,0.3297,0.06,4.5482,86.0846,752.4833,838.9829,28.3162,3308.0503,52.1042,752.838,2412.3879,0,5,64,512,505,5,7,,,0.09375,6.0,37.3861,0.015625,1.0
7
+ 0.0,0,64,512,5,4.2031,3,1.0,5741.5156,5720.9058,6532.6154,20082,1,3555.0253,0.9746,0.0,0.0,0.0,0.1169,0.0,0.0,1.0,0.0,0.0,0.9844,0.9746,21.72,75.7697,361.731,57.8,857.9687,496.2377,0.2795,0.0506,3.6853,98.4083,777.2451,876.008,32.4554,2692.5491,51.9383,777.5488,1760.8846,0,6,64,512,512,0,0,,,,,,,
8
+ 0.0,0,64,512,6,4.5781,4,1.0,10864.9395,10819.0275,18654.6667,31356,1397,9146.1479,0.9941,0.0009,0.0152,-0.0,0.1127,-0.0,0.0,1.0,0.0,0.0038,1.0,0.9941,26.2239,76.3718,428.1846,50.1,857.9687,429.7841,0.3285,0.1094,5.588,221.3386,1426.9392,1648.7818,33.3086,4928.9777,13.5134,1427.3335,3261.0614,0,7,64,512,512,0,0,,,,,,,
9
+ 0.0,0,64,512,0,0.0,0,0.0,4562.1953,4562.1953,0.0,16694,1315,2063.5003,1.0,0.0,0.0,0.0,0.1111,0.0,0.0,1.0,0.0,0.0,1.0,1.0,26.1423,76.4117,412.7117,51.9,857.9687,445.257,0.3459,0.0436,3.2475,57.5811,610.4907,668.4088,36.6974,7714.896,51.2285,610.7838,6991.9583,1,8,64,512,494,13,18,,,0.28125,18.0,,,
10
+ 0.0,0,64,512,1,1.0,1,1.0,6491.9609,6718.9148,603.1053,18269,1,3405.058,0.9629,0.0001,0.007,-0.0,0.1099,-0.0,0.0,1.0,0.0,0.0038,1.0,0.9629,26.5811,76.6696,407.6129,52.5,857.9687,450.3558,0.3266,0.0501,3.7625,93.6707,733.3147,827.3766,33.7296,2416.6198,53.5177,733.6554,1531.9602,1,9,64,512,479,20,33,,,0.515625,33.0,,,
training_logs/20260428_191936_metrics_a2_rl_stack_php_v2_400877.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/save_hf_model,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError
2
+ 0.0,0,64,512,2,0.4375,0,0.2188,4768.9824,4805.7866,1665.1667,18141,1,2456.2799,0.9883,-0.0007,0.0075,0.0,0.1119,0.0,0.0,1.0,0.0,0.0033,1.0,0.9883,15.2558,75.1672,370.8639,56.8,857.9687,487.1048,8.2923,0.0521,3.272,74.7888,683.9612,759.0926,40.7566,42.2577,7720.7893,64.0786,684.2511,6894.3459,1,10,64,512,470,22,42,0.65625,42.0,,
3
+ 0.0,0,64,512,1,1.0,1,1.0,6043.1543,6529.5032,1231.4043,14905,1,3379.2364,0.9082,0.001,0.0085,0.0,0.1094,0.0,0.0,1.0,0.0,0.0039,1.0,0.9082,15.4861,75.2612,369.6674,56.9,857.9687,488.3013,0.2726,0.0386,3.2801,86.1438,735.4492,822.0201,33.6901,,1944.634,52.3307,735.837,1066.9959,1,11,64,512,477,20,35,0.546875,35.0,,
4
+ 0.0,0,64,512,2,2.0,2,1.0,5200.8203,5582.3585,1.0,18761,1,3026.5222,0.9316,0.0,0.0,0.0,0.1061,0.0,0.0,1.0,0.0,0.0,1.0,0.9316,15.956,75.5988,372.0403,56.6,857.9687,485.9283,0.315,0.0443,3.8178,83.3515,683.5771,767.262,34.4038,,3377.1913,53.2947,683.8658,2552.7951,1,12,64,512,500,6,12,0.1875,12.0,,
5
+ 0.0,0,64,512,3,3.0,3,1.0,5112.4043,5205.6962,2021.3333,20402,1,2908.5829,0.9707,-0.0012,0.0154,-0.0,0.1116,-0.0,0.0,1.0,0.0,0.0057,1.0,0.9707,18.1414,75.7131,370.1971,56.9,857.9687,487.7716,0.3715,0.0475,3.4987,78.1533,686.8594,765.3441,34.4024,,2811.1464,52.3944,687.143,1989.8831,1,13,64,512,506,5,6,0.078125,5.0,0.015625,1.0
6
+ 0.0,0,64,512,4,4.0,4,1.0,9612.0879,9534.167,12193.8667,30017,1,7895.0271,0.9707,-0.0011,0.0202,-0.0,0.1125,-0.0,0.0,1.0,0.0,0.0025,0.9844,0.9707,25.5219,76.0509,411.2549,52.1,857.9687,446.7137,0.2489,0.0637,5.1805,202.8086,1219.5955,1422.8569,33.357,,3814.0265,48.8945,1219.9843,2337.0674,1,14,12,96,96,0,0,,,,
training_logs/20260428_191936_metrics_report.md ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SkyRL Training Metrics Analysis
2
+
3
+ Generated from 2 log files
4
+
5
+ ## Overview
6
+
7
+ | Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
8
+ |----------|-------------|---------------|---------------------|-------------------|----------------|
9
+ | a2_rl_stack_php_v2_400876 | 9 | 9 | 0.9351 | 1.0000 | 39979.4 |
10
+ | a2_rl_stack_php_v2_400877 | 14 | 5 | 0.9539 | 0.9883 | 19667.8 |
11
+
12
+ ## Async Metrics
13
+
14
+ | | Mean | Std | Min | Max | Count |
15
+ |:------------------------------|-----------:|---------:|------:|---------:|--------:|
16
+ | async/discard_rate | 0 | 0 | 0 | 0 | 14 |
17
+ | async/discarded_count | 0 | 0 | 0 | 0 | 14 |
18
+ | async/effective_batch_groups | 64 | 0 | 64 | 64 | 14 |
19
+ | async/effective_batch_samples | 512 | 0 | 512 | 512 | 14 |
20
+ | async/staleness_max | 2.42857 | 1.82775 | 0 | 6 | 14 |
21
+ | async/staleness_mean | 2.1529 | 1.62522 | 0 | 4.5781 | 14 |
22
+ | async/staleness_min | 1.85714 | 1.40642 | 0 | 4 | 14 |
23
+ | async/staleness_ratio | 0.801343 | 0.397857 | 0 | 1 | 14 |
24
+
25
+ ## Generate Metrics
26
+
27
+ | | Mean | Std | Min | Max | Count |
28
+ |:-------------------------------------|----------:|---------:|--------:|---------:|--------:|
29
+ | generate/avg_num_tokens | 6035.33 | 1868.44 | 4562.2 | 10864.9 | 14 |
30
+ | generate/avg_tokens_non_zero_rewards | 6320.58 | 1751.66 | 4562.2 | 10819 | 14 |
31
+ | generate/avg_tokens_zero_rewards | 3137.92 | 5620.05 | 0 | 18654.7 | 14 |
32
+ | generate/max_num_tokens | 20070.1 | 5259.36 | 14905 | 31356 | 14 |
33
+ | generate/min_num_tokens | 194.571 | 492.313 | 1 | 1397 | 14 |
34
+ | generate/std_num_tokens | 3859.97 | 2033.17 | 2063.5 | 9146.15 | 14 |
35
+
36
+ ## Loss Metrics
37
+
38
+ | | Mean | Std | Min | Max | Count |
39
+ |:----------------------------|------------:|-----------:|--------:|-------:|--------:|
40
+ | loss/avg_final_rewards | 0.941829 | 0.0477826 | 0.8262 | 1 | 14 |
41
+ | loss/avg_raw_advantages | 0.000257143 | 0.00161327 | -0.0012 | 0.0054 | 14 |
42
+ | loss/avg_raw_advantages_abs | 0.00615 | 0.00679929 | 0 | 0.0202 | 14 |
43
+
44
+ ## Policy Metrics
45
+
46
+ | | Mean | Std | Min | Max | Count |
47
+ |:---------------------------|-------------:|------------:|--------:|-------:|--------:|
48
+ | policy/final_loss | -7.14286e-06 | 2.67261e-05 | -0.0001 | 0 | 14 |
49
+ | policy/policy_entropy | 0.117043 | 0.0137588 | 0.1061 | 0.1612 | 14 |
50
+ | policy/policy_loss | -0.000278571 | 0.00104232 | -0.0039 | 0 | 14 |
51
+ | policy/policy_lr | 0 | 0 | 0 | 0 | 14 |
52
+ | policy/policy_update_steps | 1 | 0 | 1 | 1 | 14 |
53
+ | policy/ppo_clip_ratio | 0 | 0 | 0 | 0 | 14 |
54
+ | policy/raw_grad_norm | 0.00200714 | 0.00198396 | 0 | 0.0057 | 14 |
55
+
56
+ ## Reward Metrics
57
+
58
+ | | Mean | Std | Min | Max | Count |
59
+ |:----------------------|---------:|-----------:|-------:|------:|--------:|
60
+ | reward/avg_pass_at_8 | 0.997771 | 0.00566493 | 0.9844 | 1 | 14 |
61
+ | reward/avg_raw_reward | 0.941829 | 0.0477826 | 0.8262 | 1 | 14 |
62
+
63
+ ## System Metrics
64
+
65
+ | | Mean | Std | Min | Max | Count |
66
+ |:------------------------|---------:|-------------:|---------:|---------:|--------:|
67
+ | system/process_rss_gb | 19.5656 | 4.8101 | 13.9041 | 26.5811 | 14 |
68
+ | system/process_vms_gb | 75.8183 | 0.446416 | 75.1672 | 76.6696 | 14 |
69
+ | system/ram_available_gb | 379.315 | 24.1565 | 358.4 | 428.185 | 14 |
70
+ | system/ram_percent | 55.7857 | 2.8038 | 50.1 | 58.2 | 14 |
71
+ | system/ram_total_gb | 857.969 | 2.35957e-13 | 857.969 | 857.969 | 14 |
72
+ | system/ram_used_gb | 478.654 | 24.1565 | 429.784 | 499.569 | 14 |
73
+
74
+ ## Timing Metrics
75
+
76
+ | | Mean | Std | Min | Max | Count |
77
+ |:--------------------------------------|-------------:|-------------:|----------:|----------:|--------:|
78
+ | timing/cleanup_old_checkpoints | 1.77528 | 3.87516 | 0.0131 | 13.0164 | 14 |
79
+ | timing/compute_advantages_and_returns | 0.0590643 | 0.0288508 | 0.0386 | 0.1382 | 14 |
80
+ | timing/convert_to_training_input | 3.80907 | 0.757823 | 3.1477 | 5.588 | 14 |
81
+ | timing/fwd_logprobs_values_reward | 100.054 | 48.5845 | 57.5811 | 221.339 | 14 |
82
+ | timing/policy_train | 793.518 | 231.644 | 610.491 | 1426.94 | 14 |
83
+ | timing/run_training | 893.959 | 279.927 | 668.409 | 1648.78 | 14 |
84
+ | timing/save_checkpoints | 33.6889 | 4.95858 | 27.6606 | 45.5888 | 14 |
85
+ | timing/step | 4260.52 | 2197.04 | 1944.63 | 8812.92 | 14 |
86
+ | timing/sync_weights | 50.0743 | 11.087 | 13.5134 | 64.0786 | 14 |
87
+ | timing/train_critic_and_policy | 793.846 | 231.673 | 610.784 | 1427.33 | 14 |
88
+ | timing/wait_for_generation_buffer | 3312.66 | 2225.38 | 1067 | 7944.76 | 14 |
89
+ | timing/save_hf_model | 39.8219 | 3.44474 | 37.3861 | 42.2577 | 2 |
90
+
91
+ ## Trainer Metrics
92
+
93
+ | | Mean | Std | Min | Max | Count |
94
+ |:--------------------|-------:|---------:|------:|------:|--------:|
95
+ | trainer/epoch | 0.5 | 0.518875 | 0 | 1 | 14 |
96
+ | trainer/global_step | 7.5 | 4.1833 | 1 | 14 | 14 |
97
+
98
+ ## Batch_Errors Metrics
99
+
100
+ | | Mean | Std | Min | Max | Count |
101
+ |:------------------------------------------------|-----------:|-----------:|----------:|------------:|--------:|
102
+ | batch_errors/total_batches | 64.8571 | 22.8536 | 12 | 128 | 14 |
103
+ | batch_errors/total_instances | 518.857 | 182.829 | 96 | 1024 | 14 |
104
+ | batch_errors/total_successful | 489.357 | 157.845 | 96 | 896 | 14 |
105
+ | batch_errors/total_failed | 15.0714 | 15.2036 | 0 | 58 | 14 |
106
+ | batch_errors/total_masked | 29.3571 | 33.2487 | 0 | 126 | 14 |
107
+ | batch_errors/avg_DaytonaAuthenticationError | 0.015625 | nan | 0.015625 | 0.015625 | 1 |
108
+ | batch_errors/total_DaytonaAuthenticationError | 2 | nan | 2 | 2 | 1 |
109
+ | batch_errors/avg_AgentEnvironmentTimeoutError | 0.491477 | 0.297398 | 0.078125 | 0.984375 | 11 |
110
+ | batch_errors/total_AgentEnvironmentTimeoutError | 37.1818 | 33.4748 | 5 | 126 | 11 |
111
+ | batch_errors/avg_DaytonaError | 0.015625 | 0 | 0.015625 | 0.015625 | 2 |
112
+ | batch_errors/total_DaytonaError | 1 | 0 | 1 | 1 | 2 |
113
+
114
+ ## Training Progression by Log
115
+
116
+ ### a2_rl_stack_php_v2_400876
117
+
118
+ | Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
119
+ |------|--------|--------|-----|------|---------------|-------------|
120
+ | 1 | 0.9219 | 1.0000 | 0.000000 | -0.0001 | 8812.9 | 7944.8 |
121
+ | 2 | 0.8262 | 1.0000 | 0.000000 | 0.0000 | 3571.7 | 2729.8 |
122
+ | 3 | 0.8926 | 1.0000 | 0.000000 | 0.0000 | 3180.0 | 2346.9 |
123
+ | 4 | 0.9219 | 1.0000 | 0.000000 | 0.0000 | 3353.7 | 2556.4 |
124
+ | 5 | 0.9219 | 1.0000 | 0.000000 | -0.0000 | 3308.1 | 2412.4 |
125
+ | 6 | 0.9746 | 0.9844 | 0.000000 | 0.0000 | 2692.5 | 1760.9 |
126
+ | 7 | 0.9941 | 1.0000 | 0.000000 | -0.0000 | 4929.0 | 3261.1 |
127
+ | 8 | 1.0000 | 1.0000 | 0.000000 | 0.0000 | 7714.9 | 6992.0 |
128
+ | 9 | 0.9629 | 1.0000 | 0.000000 | -0.0000 | 2416.6 | 1532.0 |
129
+
130
+ ### a2_rl_stack_php_v2_400877
131
+
132
+ | Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
133
+ |------|--------|--------|-----|------|---------------|-------------|
134
+ | 10 | 0.9883 | 1.0000 | 0.000000 | 0.0000 | 7720.8 | 6894.3 |
135
+ | 11 | 0.9082 | 1.0000 | 0.000000 | 0.0000 | 1944.6 | 1067.0 |
136
+ | 12 | 0.9316 | 1.0000 | 0.000000 | 0.0000 | 3377.2 | 2552.8 |
137
+ | 13 | 0.9707 | 1.0000 | 0.000000 | -0.0000 | 2811.1 | 1989.9 |
138
+ | 14 | 0.9707 | 0.9844 | 0.000000 | -0.0000 | 3814.0 | 2337.1 |
139
+
140
+ ## Timing Analysis
141
+
142
+ ### Average Time Breakdown (% of step time)
143
+
144
+ | Component | Avg % of Step Time |
145
+ |-----------|-------------------|
146
+ | wait_for_generation_buffer | 73.4% |
147
+ | run_training | 25.1% |
148
+ | train_critic_and_policy | 22.3% |
149
+ | policy_train | 22.3% |
150
+ | fwd_logprobs_values_reward | 2.8% |
151
+ | sync_weights | 1.4% |
152
+ | save_checkpoints | 0.9% |
153
+ | save_hf_model | 0.8% |
154
+ | convert_to_training_input | 0.1% |
155
+ | cleanup_old_checkpoints | 0.0% |
156
+ | compute_advantages_and_returns | 0.0% |
157
+
158
+ ## Cross-Log Comparison
159
+
160
+ | Log | Avg Reward | Pass@8 | Step Time (s) | Gen Wait Time (s) | Avg Tokens | Staleness |
161
+ |-----|------|------|------|------|------|------|
162
+ | a2_rl_stack_php_v2_400876 | 0.9351 | 0.9983 | 4442.1608 | 3504.0121 | 5973.0260 | 2.1892 |
163
+ | a2_rl_stack_php_v2_400877 | 0.9539 | 0.9969 | 3933.5575 | 2968.2175 | 6147.4898 | 2.0875 |
164
+
165
+ ## vLLM Inference Engine Analysis
166
+
167
+ Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
168
+
169
+ > **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
170
+ > so we typically capture stats from one engine per timestamp. The stats shown are
171
+ > **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
172
+
173
+ ### Summary by Log (Per-Engine Stats)
174
+
175
+ | Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
176
+ |-----|-------------------|-------------------|--------------------------|----------------|------------------|
177
+ | a2_rl_stack_php_v2_400876 | 4.5 | 0.0 | 92.5 tok/s | 8.2% | 85.3% |
178
+ | a2_rl_stack_php_v2_400877 | 4.3 | 0.0 | 87.0 tok/s | 8.6% | 86.3% |
179
+
180
+ ### Utilization Analysis (Per-Engine)
181
+
182
+ Key indicators of inference engine utilization:
183
+
184
+ - **Running requests/engine**: Concurrent requests being processed by each engine
185
+ - **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
186
+ - **Generation throughput**: Decode tokens/sec per engine
187
+ - 8B model on H100 can do **1000+ tok/s** when saturated
188
+ - If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
189
+
190
+ #### a2_rl_stack_php_v2_400876
191
+
192
+ - **Running requests/engine**: avg=4.5, max=16
193
+ - **Waiting requests**: avg=0.0, max=0
194
+ - **Generation throughput/engine**: avg=92.5 tok/s, max=348.4 tok/s
195
+ - **KV cache usage**: avg=8.2%
196
+ - **Prefix cache hit rate**: avg=85.3%
197
+ - ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 4.5 running)
198
+ - Bottleneck is likely upstream (environment execution, not inference)
199
+
200
+ #### a2_rl_stack_php_v2_400877
201
+
202
+ - **Running requests/engine**: avg=4.3, max=15
203
+ - **Waiting requests**: avg=0.0, max=0
204
+ - **Generation throughput/engine**: avg=87.0 tok/s, max=307.0 tok/s
205
+ - **KV cache usage**: avg=8.6%
206
+ - **Prefix cache hit rate**: avg=86.3%
207
+ - ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 4.3 running)
208
+ - Bottleneck is likely upstream (environment execution, not inference)
209
+
210
+ ## Trial-Level Analysis (from result.json)
211
+
212
+ Total trials parsed: 8273
213
+
214
+ ### Turn Count Statistics
215
+
216
+ | Metric | Value |
217
+ |--------|-------|
218
+ | Mean | 3.8 |
219
+ | Median | 3.0 |
220
+ | Std | 3.1 |
221
+ | Min | 1 |
222
+ | Max | 62 |
223
+ | Count | 8273 |
224
+
225
+ ### Exception Distribution
226
+
227
+ | Exception Type | Count | % |
228
+ |---------------|-------|---|
229
+ | No exception | 6920 | 83.6% |
230
+ | AgentTimeoutError | 733 | 8.9% |
231
+ | AgentEnvironmentTimeoutError | 410 | 5.0% |
232
+ | ContextLengthExceededError | 147 | 1.8% |
233
+ | VerifierRuntimeError | 58 | 0.7% |
234
+ | DaytonaError | 2 | 0.0% |
235
+ | DaytonaAuthenticationError | 2 | 0.0% |
236
+ | InternalServerError | 1 | 0.0% |
237
+
238
+ ### Turn Count by Exception Type
239
+
240
+ | Exception Type | Mean Turns | Median Turns | Count |
241
+ |---------------|-----------|-------------|-------|
242
+ | InternalServerError | 22.0 | 22.0 | 1 |
243
+ | ContextLengthExceededError | 20.9 | 20.0 | 147 |
244
+ | AgentTimeoutError | 7.0 | 6.0 | 733 |
245
+ | AgentEnvironmentTimeoutError | 4.9 | 5.0 | 410 |
246
+ | VerifierRuntimeError | 4.8 | 5.0 | 58 |
247
+ | No exception | 3.0 | 3.0 | 6920 |
248
+ | DaytonaError | 2.0 | 2.0 | 2 |
249
+ | DaytonaAuthenticationError | 1.0 | 1.0 | 2 |
250
+
251
+ ### Turn Count by Outcome
252
+
253
+ | Outcome | Mean Turns | Median Turns | Count |
254
+ |---------|-----------|-------------|-------|
255
+ | Success | 3.8 | 3.0 | 8162 |
256
+ | Failure | 7.9 | 7.0 | 47 |
257
+
258
+ ### Reward Summary
259
+
260
+ - Mean reward: 0.9943
261
+ - Success rate: 99.4%
262
+ - Trials with reward data: 8209
263
+
training_logs/20260428_191936_metrics_table.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError,global_step
2
+ a2_rl_stack_php_v2_400876,0.0,0,64,512,0,0.0,0,0.0,5173.5488,5581.1377,364.0,16498,1,3153.1045,0.9219,0.0054,0.0054,-0.0001,0.1612,-0.0039,0.0,1.0,0.0,0.0019,1.0,0.9219,13.9041,75.2886,363.3583,57.6,857.9687,494.6104,13.0164,0.1382,3.3894,86.136,726.5471,813.1269,45.5888,8812.9184,51.6468,726.8524,7944.7551,0,1,128,1024,896,58,126,0.015625,2.0,0.984375,126.0,,,,1
3
+ a2_rl_stack_php_v2_400876,0.0,0,64,512,1,1.0,1,1.0,4930.5762,5967.7683,1.0,17443,1,3568.3347,0.8262,0.0,0.0,0.0,0.1245,0.0,0.0,1.0,0.0,0.0,1.0,0.8262,15.6279,75.6176,360.3593,58.0,857.9687,497.6094,0.0131,0.0414,3.5316,74.6536,709.0029,784.0235,27.6606,3571.7276,54.3544,709.3282,2729.8153,0,2,64,512,459,21,53,,,0.828125,53.0,,,,2
4
+ a2_rl_stack_php_v2_400876,0.0,0,64,512,2,2.0,2,1.0,5124.7324,5696.0591,377.5273,15485,1,3123.3346,0.8926,0.0,0.0,0.0,0.1246,0.0,0.0,1.0,0.0,0.0,1.0,0.8926,15.9937,75.74,358.4,58.2,857.9687,499.5687,0.3129,0.0457,3.1477,83.4744,695.0441,778.8675,28.7517,3180.0156,51.0655,695.3471,2346.9327,0,3,64,512,472,20,40,,,0.625,40.0,,,,3
5
+ a2_rl_stack_php_v2_400876,0.0,0,64,512,3,2.9844,2,1.0,5176.248,5614.8284,1.0,16890,1,2922.3081,0.9219,0.0,0.0,0.0,0.115,0.0,0.0,1.0,0.0,0.0,1.0,0.9219,16.2626,75.8032,361.1512,57.9,857.9687,496.8175,0.401,0.0417,3.3777,74.1559,668.7475,743.2802,28.5269,3353.6926,50.6787,669.0823,2556.3534,0,4,64,512,473,21,39,,,0.609375,39.0,,,,4
6
+ a2_rl_stack_php_v2_400876,0.0,0,64,512,4,3.9375,3,1.0,5691.5176,6149.7648,284.2,26038,1,3437.1652,0.9219,-0.0008,0.0069,-0.0,0.1112,-0.0,0.0,1.0,0.0,0.0032,1.0,0.9219,21.1017,75.9924,362.8714,57.7,857.9687,495.0973,0.3297,0.06,4.5482,86.0846,752.4833,838.9829,28.3162,3308.0503,52.1042,752.838,2412.3879,0,5,64,512,505,5,7,,,0.09375,6.0,37.3861,0.015625,1.0,5
7
+ a2_rl_stack_php_v2_400876,0.0,0,64,512,5,4.2031,3,1.0,5741.5156,5720.9058,6532.6154,20082,1,3555.0253,0.9746,0.0,0.0,0.0,0.1169,0.0,0.0,1.0,0.0,0.0,0.9844,0.9746,21.72,75.7697,361.731,57.8,857.9687,496.2377,0.2795,0.0506,3.6853,98.4083,777.2451,876.008,32.4554,2692.5491,51.9383,777.5488,1760.8846,0,6,64,512,512,0,0,,,,,,,,6
8
+ a2_rl_stack_php_v2_400876,0.0,0,64,512,6,4.5781,4,1.0,10864.9395,10819.0275,18654.6667,31356,1397,9146.1479,0.9941,0.0009,0.0152,-0.0,0.1127,-0.0,0.0,1.0,0.0,0.0038,1.0,0.9941,26.2239,76.3718,428.1846,50.1,857.9687,429.7841,0.3285,0.1094,5.588,221.3386,1426.9392,1648.7818,33.3086,4928.9777,13.5134,1427.3335,3261.0614,0,7,64,512,512,0,0,,,,,,,,7
9
+ a2_rl_stack_php_v2_400876,0.0,0,64,512,0,0.0,0,0.0,4562.1953,4562.1953,0.0,16694,1315,2063.5003,1.0,0.0,0.0,0.0,0.1111,0.0,0.0,1.0,0.0,0.0,1.0,1.0,26.1423,76.4117,412.7117,51.9,857.9687,445.257,0.3459,0.0436,3.2475,57.5811,610.4907,668.4088,36.6974,7714.896,51.2285,610.7838,6991.9583,1,8,64,512,494,13,18,,,0.28125,18.0,,,,8
10
+ a2_rl_stack_php_v2_400876,0.0,0,64,512,1,1.0,1,1.0,6491.9609,6718.9148,603.1053,18269,1,3405.058,0.9629,0.0001,0.007,-0.0,0.1099,-0.0,0.0,1.0,0.0,0.0038,1.0,0.9629,26.5811,76.6696,407.6129,52.5,857.9687,450.3558,0.3266,0.0501,3.7625,93.6707,733.3147,827.3766,33.7296,2416.6198,53.5177,733.6554,1531.9602,1,9,64,512,479,20,33,,,0.515625,33.0,,,,9
11
+ a2_rl_stack_php_v2_400877,0.0,0,64,512,2,0.4375,0,0.2188,4768.9824,4805.7866,1665.1667,18141,1,2456.2799,0.9883,-0.0007,0.0075,0.0,0.1119,0.0,0.0,1.0,0.0,0.0033,1.0,0.9883,15.2558,75.1672,370.8639,56.8,857.9687,487.1048,8.2923,0.0521,3.272,74.7888,683.9612,759.0926,40.7566,7720.7893,64.0786,684.2511,6894.3459,1,10,64,512,470,22,42,,,0.65625,42.0,42.2577,,,10
12
+ a2_rl_stack_php_v2_400877,0.0,0,64,512,1,1.0,1,1.0,6043.1543,6529.5032,1231.4043,14905,1,3379.2364,0.9082,0.001,0.0085,0.0,0.1094,0.0,0.0,1.0,0.0,0.0039,1.0,0.9082,15.4861,75.2612,369.6674,56.9,857.9687,488.3013,0.2726,0.0386,3.2801,86.1438,735.4492,822.0201,33.6901,1944.634,52.3307,735.837,1066.9959,1,11,64,512,477,20,35,,,0.546875,35.0,,,,11
13
+ a2_rl_stack_php_v2_400877,0.0,0,64,512,2,2.0,2,1.0,5200.8203,5582.3585,1.0,18761,1,3026.5222,0.9316,0.0,0.0,0.0,0.1061,0.0,0.0,1.0,0.0,0.0,1.0,0.9316,15.956,75.5988,372.0403,56.6,857.9687,485.9283,0.315,0.0443,3.8178,83.3515,683.5771,767.262,34.4038,3377.1913,53.2947,683.8658,2552.7951,1,12,64,512,500,6,12,,,0.1875,12.0,,,,12
14
+ a2_rl_stack_php_v2_400877,0.0,0,64,512,3,3.0,3,1.0,5112.4043,5205.6962,2021.3333,20402,1,2908.5829,0.9707,-0.0012,0.0154,-0.0,0.1116,-0.0,0.0,1.0,0.0,0.0057,1.0,0.9707,18.1414,75.7131,370.1971,56.9,857.9687,487.7716,0.3715,0.0475,3.4987,78.1533,686.8594,765.3441,34.4024,2811.1464,52.3944,687.143,1989.8831,1,13,64,512,506,5,6,,,0.078125,5.0,,0.015625,1.0,13
15
+ a2_rl_stack_php_v2_400877,0.0,0,64,512,4,4.0,4,1.0,9612.0879,9534.167,12193.8667,30017,1,7895.0271,0.9707,-0.0011,0.0202,-0.0,0.1125,-0.0,0.0,1.0,0.0,0.0025,0.9844,0.9707,25.5219,76.0509,411.2549,52.1,857.9687,446.7137,0.2489,0.0637,5.1805,202.8086,1219.5955,1422.8569,33.357,3814.0265,48.8945,1219.9843,2337.0674,1,14,12,96,96,0,0,,,,,,,,14
training_logs/20260428_191936_reward_vs_steps.png ADDED

Git LFS Details

  • SHA256: 603e4ed68d969a7548285d2dcd397f4c81cfca86b924804013352d3c6db7161d
  • Pointer size: 131 Bytes
  • Size of remote file: 148 kB
training_logs/20260428_191936_trial_results.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260428_191936_turn_count_distribution.png ADDED

Git LFS Details

  • SHA256: b15aff8845abc0bb80079e674eb4570e02e14bb2313db8907a64e5866f09f83e
  • Pointer size: 131 Bytes
  • Size of remote file: 121 kB
training_logs/20260428_191936_vllm_metrics_a2_rl_stack_php_v2_400876.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260428_191936_vllm_metrics_a2_rl_stack_php_v2_400877.csv ADDED
The diff for this file is too large to render. See raw diff
 
training_logs/20260428_191936_vllm_metrics_table.csv ADDED
The diff for this file is too large to render. See raw diff