Add parsed training metrics and plots

Browse files

Files changed (11) hide show

.gitattributes +2 -0
training_logs/20260429_184357_metrics_a2_rl_unitsyn_python_v3_402161.csv +13 -0
training_logs/20260429_184357_metrics_a2_rl_unitsyn_python_v3_402162.csv +3 -0
training_logs/20260429_184357_metrics_report.md +265 -0
training_logs/20260429_184357_metrics_table.csv +15 -0
training_logs/20260429_184357_reward_vs_steps.png +3 -0
training_logs/20260429_184357_trial_results.csv +0 -0
training_logs/20260429_184357_turn_count_distribution.png +3 -0
training_logs/20260429_184357_vllm_metrics_a2_rl_unitsyn_python_v3_402161.csv +0 -0
training_logs/20260429_184357_vllm_metrics_a2_rl_unitsyn_python_v3_402162.csv +0 -0
training_logs/20260429_184357_vllm_metrics_table.csv +0 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
+training_logs/20260429_184357_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
+training_logs/20260429_184357_turn_count_distribution.png filter=lfs diff=lfs merge=lfs -text

training_logs/20260429_184357_metrics_a2_rl_unitsyn_python_v3_402161.csv ADDED Viewed

	@@ -0,0 +1,13 @@

+async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,batch_errors/avg_VerifierRuntimeError,batch_errors/total_VerifierRuntimeError,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError
+0.0,0,64,512,0,0.0,0,0.0,5121.6914,4576.6295,5769.2436,13905,1499,2503.5505,0.543,0.0021,0.1803,0.0,0.219,0.0,0.0,1.0,0.0,0.0323,0.6719,0.543,11.793,75.1959,379.3328,55.8,857.9687,478.6359,10.9023,0.1146,2.5571,72.0045,656.5065,728.9238,44.3353,6093.5913,52.8301,656.8044,5309.2801,0,1,128,1024,994,14,18,0.0859375,11.0,0.140625,18.0,0.0078125,1.0,,,,,
+0.0,0,64,512,1,1.0,1,1.0,5871.2051,5096.5576,6791.5128,15067,1,3402.3282,0.543,-0.0001,0.1554,-0.0002,0.2129,-0.0057,0.0,1.0,0.0,0.0274,0.6875,0.543,12.9813,75.3753,375.7304,56.2,857.9687,482.2383,0.0121,0.0559,2.7428,72.8696,707.4558,780.6952,29.1329,2730.7674,52.7333,707.7694,1894.5939,0,2,64,512,464,19,44,0.046875,3.0,0.6875,44.0,,,0.015625,1.0,,,
+0.0,0,64,512,2,2.0,2,1.0,5706.7969,4967.944,6318.9893,14787,1,3398.0642,0.4531,-0.0022,0.1487,0.0,0.198,0.0,0.0,1.0,0.0,0.0284,0.625,0.4531,13.4866,75.6917,379.7476,55.7,857.9687,478.2211,0.3416,0.0392,2.6589,73.1319,687.2284,760.7086,29.5563,2618.6945,52.8218,687.5371,1802.5023,0,3,64,512,491,9,20,0.015625,1.0,0.3125,20.0,,,,,,,
+0.0,0,64,512,3,3.0,3,1.0,5825.2578,4940.4704,6689.5483,14273,1,3209.1042,0.4941,-0.0017,0.0659,0.0,0.2102,0.0,0.0,1.0,0.0,0.0177,0.5312,0.4941,13.7198,75.7789,383.0292,55.4,857.9687,474.9395,0.3775,0.038,2.6718,77.6872,669.8871,747.9511,28.8753,2255.3097,53.5368,670.2257,1451.1474,0,4,64,512,456,20,40,0.09375,6.0,0.625,40.0,0.03125,2.0,0.125,8.0,,,
+0.0,0,64,512,4,3.75,3,1.0,5520.8398,5594.9188,5458.4856,16082,1,3484.2247,0.457,0.0055,0.1648,-0.0002,0.2057,-0.0064,0.0,1.0,0.0,0.0214,0.6094,0.457,14.6653,76.0574,385.3959,55.1,857.9687,472.5728,0.3196,0.044,2.9597,73.5587,681.9984,755.9482,29.4618,3542.6439,52.7498,682.3452,2730.9604,0,5,64,512,464,14,42,0.09375,6.0,0.65625,42.0,,,,,37.5898,,
+0.0,0,64,512,5,4.125,3,1.0,5461.6738,5218.1189,5769.8894,14552,1,3328.2592,0.5586,0.014,0.1023,-0.0002,0.1969,-0.0076,0.0,1.0,0.0,0.0179,0.7031,0.5586,15.0607,76.0109,385.4834,55.1,857.9687,472.4853,0.366,0.0393,2.7127,68.8947,678.4589,747.7171,29.9495,2213.735,54.8743,678.7827,1408.4036,0,6,64,512,474,15,28,0.125,8.0,0.4375,28.0,,,0.03125,2.0,,,
+0.0,0,64,512,6,4.6562,4,1.0,7798.1641,7124.3027,8179.4006,20083,1,3802.9833,0.3613,0.0039,0.1358,-0.0001,0.2261,-0.004,0.0,1.0,0.0,0.0185,0.5312,0.3613,18.1326,76.0253,380.9768,55.6,857.9687,476.9919,0.6987,0.0516,3.5227,93.4902,731.6962,825.5737,29.3423,4983.416,13.7041,732.0316,4140.5883,0,7,64,512,512,0,0,,,,,,,,,,,
+0.0,0,64,512,0,0.0,0,0.0,4865.5312,4540.8412,5507.3605,14128,1386,2616.8319,0.6641,-0.0047,0.0958,-0.0,0.206,-0.0,0.0,1.0,0.0,0.0163,0.7188,0.6641,18.3456,76.1459,384.4786,55.2,857.9687,473.4901,0.3173,0.0353,2.9251,73.3734,646.7222,720.5432,29.7617,5669.0399,52.8866,647.1342,4892.6495,1,8,64,512,470,16,39,0.046875,3.0,0.609375,39.0,,,,,,,
+0.0,0,64,512,1,1.0,1,1.0,5855.5039,5276.4774,6594.0844,13894,1,3271.3689,0.5605,0.0007,0.0512,-0.0,0.2096,-0.0006,0.0,1.0,0.0,0.0219,0.6406,0.5605,18.6552,76.4328,386.8148,54.9,857.9687,471.1539,0.3384,0.0375,2.7372,78.6677,660.2909,739.3591,30.4714,3239.5269,52.9167,660.6535,2444.512,1,9,64,512,432,25,70,0.125,8.0,1.078125,69.0,,,0.03125,2.0,,0.015625,1.0
+0.0,0,64,512,2,2.0,2,1.0,5375.2695,5344.5093,5397.7162,14823,1,3872.2484,0.4219,0.0135,0.113,-0.0002,0.1788,-0.007,0.0,1.0,0.0,0.0154,0.5469,0.4219,18.8019,76.4187,385.8892,55.0,857.9687,472.0795,0.3767,0.0413,2.7467,73.8308,700.3324,774.57,29.1637,3338.1958,53.0117,700.6976,2507.8644,1,10,64,512,463,23,37,0.140625,9.0,0.578125,37.0,,,0.046875,3.0,36.7892,,
+0.0,0,64,512,3,3.0,3,1.0,5192.8828,4679.4904,5726.7291,15138,1,3311.2403,0.5098,0.0084,0.0758,-0.0002,0.1873,-0.0056,0.0,1.0,0.0,0.0164,0.5625,0.5098,18.9517,76.4518,383.7386,55.3,857.9687,474.23,0.3045,0.0384,2.7622,74.4343,676.0857,750.8928,28.7197,2321.6045,57.2462,676.4197,1510.6996,1,11,64,512,437,26,61,0.140625,9.0,0.953125,61.0,,,0.078125,5.0,,,
+0.0,0,64,512,4,3.7969,3,1.0,5528.9668,5645.4653,5422.0674,16073,1,3727.4604,0.4785,0.0021,0.1344,-0.0001,0.1796,-0.0038,0.0,1.0,0.0,0.0198,0.6094,0.4785,19.0433,76.3939,383.2556,55.3,857.9687,474.7131,0.3795,0.0401,3.2865,78.4764,697.4553,776.3111,28.7288,2932.4204,52.7552,697.7943,2100.0414,1,12,16,128,110,8,15,0.0625,1.0,0.9375,15.0,,,0.125,2.0,,,

training_logs/20260429_184357_metrics_a2_rl_unitsyn_python_v3_402162.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError
+0.0,0,64,512,5,1.1406,0,0.2344,5434.5586,5246.1829,5770.3587,13053,1,3046.1974,0.6406,-0.003,0.059,0.0,0.189,0.0,0.0,1.0,0.0,0.0114,0.7308,0.6406,11.8048,75.1254,372.3459,56.6,857.9687,485.6227,12.7402,0.0478,2.4902,73.844,683.1484,757.335,46.1195,8330.7966,51.8296,683.4422,7519.1416,1,13,65,520,426,31,55,0.5846153846153846,38,0.8461538461538461,55.0,0.015384615384615385,1.0
+0.0,0,64,512,1,1.0,1,1.0,6345.9297,6347.6109,6344.6529,18003,1,4261.8702,0.4316,0.0217,0.1127,-0.0005,0.1793,-0.0156,0.0,1.0,0.0,0.0191,0.5625,0.4316,15.5447,75.3049,399.4698,53.4,857.9687,458.4989,0.3343,0.0742,3.1141,89.1038,721.9162,811.4036,33.5523,871.8394,57.2936,722.2251,0.0049,1,14,11,88,84,4,0,0.36363636363636365,4,,,,

training_logs/20260429_184357_metrics_report.md ADDED Viewed

	@@ -0,0 +1,265 @@

+# SkyRL Training Metrics Analysis
+Generated from 2 log files
+## Overview
+| Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
+|----------|-------------|---------------|---------------------|-------------------|----------------|
+| a2_rl_unitsyn_python_v3_402161 | 12 | 12 | 0.5037 | 0.6641 | 41938.9 |
+| a2_rl_unitsyn_python_v3_402162 | 14 | 2 | 0.5361 | 0.6406 | 9202.6 |
+## Async Metrics
+|                               |       Mean |      Std |   Min |      Max |   Count |
+|:------------------------------|-----------:|---------:|------:|---------:|--------:|
+| async/discard_rate            |   0        | 0        |     0 |   0      |      14 |
+| async/discarded_count         |   0        | 0        |     0 |   0      |      14 |
+| async/effective_batch_groups  |  64        | 0        |    64 |  64      |      14 |
+| async/effective_batch_samples | 512        | 0        |   512 | 512      |      14 |
+| async/staleness_max           |   2.64286  | 1.94569  |     0 |   6      |      14 |
+| async/staleness_mean          |   2.17634  | 1.54834  |     0 |   4.6562 |      14 |
+| async/staleness_min           |   1.85714  | 1.35062  |     0 |   4      |      14 |
+| async/staleness_ratio         |   0.802457 | 0.396118 |     0 |   1      |      14 |
+## Generate Metrics
+|                                      |      Mean |      Std |      Min |      Max |   Count |
+|:-------------------------------------|----------:|---------:|---------:|---------:|--------:|
+| generate/avg_num_tokens              |  5707.45  |  705.472 |  4865.53 |  7798.16 |      14 |
+| generate/avg_tokens_non_zero_rewards |  5328.54  |  701.545 |  4540.84 |  7124.3  |      14 |
+| generate/avg_tokens_zero_rewards     |  6124.29  |  768.378 |  5397.72 |  8179.4  |      14 |
+| generate/max_num_tokens              | 15275.8   | 1837.96  | 13053    | 20083    |      14 |
+| generate/min_num_tokens              |   206.929 |  523.93  |     1    |  1499    |      14 |
+| generate/std_num_tokens              |  3373.98  |  467.502 |  2503.55 |  4261.87 |      14 |
+## Loss Metrics
+|                             |     Mean |        Std |     Min |    Max |   Count |
+|:----------------------------|---------:|-----------:|--------:|-------:|--------:|
+| loss/avg_final_rewards      | 0.508364 | 0.083743   |  0.3613 | 0.6641 |      14 |
+| loss/avg_raw_advantages     | 0.0043   | 0.00761799 | -0.0047 | 0.0217 |      14 |
+| loss/avg_raw_advantages_abs | 0.113936 | 0.041136   |  0.0512 | 0.1803 |      14 |
+## Policy Metrics
+|                            |         Mean |         Std |     Min |    Max |   Count |
+|:---------------------------|-------------:|------------:|--------:|-------:|--------:|
+| policy/final_loss          | -0.000121429 | 0.000142389 | -0.0005 | 0      |      14 |
+| policy/policy_entropy      |  0.199886    | 0.0153501   |  0.1788 | 0.2261 |      14 |
+| policy/policy_loss         | -0.00402143  | 0.00446631  | -0.0156 | 0      |      14 |
+| policy/policy_lr           |  0           | 0           |  0      | 0      |      14 |
+| policy/policy_update_steps |  1           | 0           |  1      | 1      |      14 |
+| policy/ppo_clip_ratio      |  0           | 0           |  0      | 0      |      14 |
+| policy/raw_grad_norm       |  0.0202786   | 0.00565457  |  0.0114 | 0.0323 |      14 |
+## Reward Metrics
+|                       |     Mean |       Std |    Min |    Max |   Count |
+|:----------------------|---------:|----------:|-------:|-------:|--------:|
+| reward/avg_pass_at_8  | 0.623629 | 0.0703892 | 0.5312 | 0.7308 |      14 |
+| reward/avg_raw_reward | 0.508364 | 0.083743  | 0.3613 | 0.6641 |      14 |
+## System Metrics
+|                         |     Mean |         Std |      Min |      Max |   Count |
+|:------------------------|---------:|------------:|---------:|---------:|--------:|
+| system/process_rss_gb   |  15.7847 | 2.78836     |  11.793  |  19.0433 |      14 |
+| system/process_vms_gb   |  75.8863 | 0.480004    |  75.1254 |  76.4518 |      14 |
+| system/ram_available_gb | 383.263  | 6.21639     | 372.346  | 399.47   |      14 |
+| system/ram_percent      |  55.3286 | 0.731099    |  53.4    |  56.6    |      14 |
+| system/ram_total_gb     | 857.969  | 2.35957e-13 | 857.969  | 857.969  |      14 |
+| system/ram_used_gb      | 474.705  | 6.21637     | 458.499  | 485.623  |      14 |
+## Timing Metrics
+|                                       |       Mean |          Std |      Min |       Max |   Count |
+|:--------------------------------------|-----------:|-------------:|---------:|----------:|--------:|
+| timing/cleanup_old_checkpoints        |    1.98634 |    4.18445   |   0.0121 |   12.7402 |      14 |
+| timing/compute_advantages_and_returns |    0.0498  |    0.0212895 |   0.0353 |    0.1146 |      14 |
+| timing/convert_to_training_input      |    2.84912 |    0.287621  |   2.4902 |    3.5227 |      14 |
+| timing/fwd_logprobs_values_reward     |   76.6691  |    6.77429   |  68.8947 |   93.4902 |      14 |
+| timing/policy_train                   |  685.656   |   24.3175    | 646.722  |  731.696  |      14 |
+| timing/run_training                   |  762.709   |   29.2176    | 720.543  |  825.574  |      14 |
+| timing/save_checkpoints               |   31.9408  |    5.76727   |  28.7197 |   46.1195 |      14 |
+| timing/step                           | 3652.97    | 1959.73      | 871.839  | 8330.8    |      14 |
+| timing/sync_weights                   |   50.7993  |   10.807     |  13.7041 |   57.2936 |      14 |
+| timing/train_critic_and_policy        |  685.99    |   24.3066    | 647.134  |  732.032  |      14 |
+| timing/wait_for_generation_buffer     | 2836.6     | 1971.02      |   0.0049 | 7519.14   |      14 |
+| timing/save_hf_model                  |   37.1895  |    0.56611   |  36.7892 |   37.5898 |       2 |
+## Trainer Metrics
+|                     |   Mean |      Std |   Min |   Max |   Count |
+|:--------------------|-------:|---------:|------:|------:|--------:|
+| trainer/epoch       |    0.5 | 0.518875 |     0 |     1 |      14 |
+| trainer/global_step |    7.5 | 4.1833   |     1 |    14 |      14 |
+## Batch_Errors Metrics
+|                                                 |        Mean |         Std |        Min |         Max |   Count |
+|:------------------------------------------------|------------:|------------:|-----------:|------------:|--------:|
+| batch_errors/total_batches                      |  61.4286    |  26.4828    | 11         |  128        |      14 |
+| batch_errors/total_instances                    | 491.429     | 211.863     | 88         | 1024        |      14 |
+| batch_errors/total_successful                   | 448.357     | 206.364     | 84         |  994        |      14 |
+| batch_errors/total_failed                       |  16         |   8.77935   |  0         |   31        |      14 |
+| batch_errors/total_masked                       |  33.5       |  21.2268    |  0         |   70        |      14 |
+| batch_errors/avg_DaytonaAuthenticationError     |   0.148063  |   0.156344  |  0.015625  |    0.584615 |      13 |
+| batch_errors/total_DaytonaAuthenticationError   |   8.23077   |   9.49696   |  1         |   38        |      13 |
+| batch_errors/avg_AgentEnvironmentTimeoutError   |   0.655148  |   0.273596  |  0.140625  |    1.07812  |      12 |
+| batch_errors/total_AgentEnvironmentTimeoutError |  39         |  16.9973    | 15         |   69        |      12 |
+| batch_errors/avg_VerifierRuntimeError           |   0.0195312 |   0.0165728 |  0.0078125 |    0.03125  |       2 |
+| batch_errors/total_VerifierRuntimeError         |   1.5       |   0.707107  |  1         |    2        |       2 |
+| batch_errors/avg_VerifierTimeoutError           |   0.0585637 |   0.0455868 |  0.0153846 |    0.125    |       8 |
+| batch_errors/total_VerifierTimeoutError         |   3         |   2.39046   |  1         |    8        |       8 |
+| batch_errors/avg_DaytonaError                   |   0.015625  | nan         |  0.015625  |    0.015625 |       1 |
+| batch_errors/total_DaytonaError                 |   1         | nan         |  1         |    1        |       1 |
+## Training Progression by Log
+### a2_rl_unitsyn_python_v3_402161
+| Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
+|------|--------|--------|-----|------|---------------|-------------|
+| 1 | 0.5430 | 0.6719 | 0.000000 | 0.0000 | 6093.6 | 5309.3 |
+| 2 | 0.5430 | 0.6875 | 0.000000 | -0.0002 | 2730.8 | 1894.6 |
+| 3 | 0.4531 | 0.6250 | 0.000000 | 0.0000 | 2618.7 | 1802.5 |
+| 4 | 0.4941 | 0.5312 | 0.000000 | 0.0000 | 2255.3 | 1451.1 |
+| 5 | 0.4570 | 0.6094 | 0.000000 | -0.0002 | 3542.6 | 2731.0 |
+| 6 | 0.5586 | 0.7031 | 0.000000 | -0.0002 | 2213.7 | 1408.4 |
+| 7 | 0.3613 | 0.5312 | 0.000000 | -0.0001 | 4983.4 | 4140.6 |
+| 8 | 0.6641 | 0.7188 | 0.000000 | -0.0000 | 5669.0 | 4892.6 |
+| 9 | 0.5605 | 0.6406 | 0.000000 | -0.0000 | 3239.5 | 2444.5 |
+| 10 | 0.4219 | 0.5469 | 0.000000 | -0.0002 | 3338.2 | 2507.9 |
+| 11 | 0.5098 | 0.5625 | 0.000000 | -0.0002 | 2321.6 | 1510.7 |
+| 12 | 0.4785 | 0.6094 | 0.000000 | -0.0001 | 2932.4 | 2100.0 |
+### a2_rl_unitsyn_python_v3_402162
+| Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
+|------|--------|--------|-----|------|---------------|-------------|
+| 13 | 0.6406 | 0.7308 | 0.000000 | 0.0000 | 8330.8 | 7519.1 |
+| 14 | 0.4316 | 0.5625 | 0.000000 | -0.0005 | 871.8 | 0.0 |
+## Timing Analysis
+### Average Time Breakdown (% of step time)
+| Component | Avg % of Step Time |
+|-----------|-------------------|
+| wait_for_generation_buffer | 69.8% |
+| run_training | 28.2% |
+| train_critic_and_policy | 25.3% |
+| policy_train | 25.3% |
+| fwd_logprobs_values_reward | 2.9% |
+| sync_weights | 1.9% |
+| save_checkpoints | 1.1% |
+| save_hf_model | 1.1% |
+| convert_to_training_input | 0.1% |
+| cleanup_old_checkpoints | 0.0% |
+| compute_advantages_and_returns | 0.0% |
+## Cross-Log Comparison
+| Log | Avg Reward | Pass@8 | Step Time (s) | Gen Wait Time (s) | Avg Tokens | Staleness |
+|-----|------|------|------|------|------|------|
+| a2_rl_unitsyn_python_v3_402161 | 0.5037 | 0.6198 | 3494.9121 | 2682.7702 | 5676.9819 | 2.3607 |
+| a2_rl_unitsyn_python_v3_402162 | 0.5361 | 0.6466 | 4601.3180 | 3759.5732 | 5890.2442 | 1.0703 |
+## vLLM Inference Engine Analysis
+Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
+> **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
+> so we typically capture stats from one engine per timestamp. The stats shown are
+> **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
+### Summary by Log (Per-Engine Stats)
+| Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
+|-----|-------------------|-------------------|--------------------------|----------------|------------------|
+| a2_rl_unitsyn_python_v3_402161 | 5.7 | 0.0 | 110.0 tok/s | 8.3% | 88.9% |
+| a2_rl_unitsyn_python_v3_402162 | 5.5 | 0.0 | 111.2 tok/s | 8.6% | 86.6% |
+### Utilization Analysis (Per-Engine)
+Key indicators of inference engine utilization:
+- **Running requests/engine**: Concurrent requests being processed by each engine
+- **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
+- **Generation throughput**: Decode tokens/sec per engine
+  - 8B model on H100 can do **1000+ tok/s** when saturated
+  - If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
+#### a2_rl_unitsyn_python_v3_402161
+- **Running requests/engine**: avg=5.7, max=22
+- **Waiting requests**: avg=0.0, max=0
+- **Generation throughput/engine**: avg=110.0 tok/s, max=412.1 tok/s
+- **KV cache usage**: avg=8.3%
+- **Prefix cache hit rate**: avg=88.9%
+- ⚠️ **Low throughput**: 110 tok/s << expected 1000+ tok/s for saturated 8B model
+#### a2_rl_unitsyn_python_v3_402162
+- **Running requests/engine**: avg=5.5, max=21
+- **Waiting requests**: avg=0.0, max=0
+- **Generation throughput/engine**: avg=111.2 tok/s, max=471.0 tok/s
+- **KV cache usage**: avg=8.6%
+- **Prefix cache hit rate**: avg=86.6%
+- ⚠️ **Low throughput**: 111 tok/s << expected 1000+ tok/s for saturated 8B model
+## Trial-Level Analysis (from result.json)
+Total trials parsed: 7602
+### Turn Count Statistics
+| Metric | Value |
+|--------|-------|
+| Mean | 2.1 |
+| Median | 2.0 |
+| Std | 0.5 |
+| Min | 1 |
+| Max | 7 |
+| Count | 7602 |
+### Exception Distribution
+| Exception Type | Count | % |
+|---------------|-------|---|
+| No exception | 6981 | 91.8% |
+| AgentEnvironmentTimeoutError | 476 | 6.3% |
+| DaytonaAuthenticationError | 109 | 1.4% |
+| VerifierTimeoutError | 24 | 0.3% |
+| CancelledError | 7 | 0.1% |
+| VerifierRuntimeError | 3 | 0.0% |
+| DaytonaError | 1 | 0.0% |
+| AgentTimeoutError | 1 | 0.0% |
+### Turn Count by Exception Type
+| Exception Type | Mean Turns | Median Turns | Count |
+|---------------|-----------|-------------|-------|
+| AgentTimeoutError | 6.0 | 6.0 | 1 |
+| VerifierTimeoutError | 2.4 | 2.0 | 24 |
+| No exception | 2.2 | 2.0 | 6981 |
+| DaytonaError | 2.0 | 2.0 | 1 |
+| VerifierRuntimeError | 2.0 | 2.0 | 3 |
+| AgentEnvironmentTimeoutError | 1.7 | 1.0 | 476 |
+| DaytonaAuthenticationError | 1.1 | 1.0 | 109 |
+| CancelledError | 1.0 | 1.0 | 7 |
+### Turn Count by Outcome
+| Outcome | Mean Turns | Median Turns | Count |
+|---------|-----------|-------------|-------|
+| Success | 2.1 | 2.0 | 3882 |
+| Failure | 2.2 | 2.0 | 3286 |
+### Reward Summary
+- Mean reward: 0.5416
+- Success rate: 54.2%
+- Trials with reward data: 7168

training_logs/20260429_184357_metrics_table.csv ADDED Viewed

	@@ -0,0 +1,15 @@

+log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_DaytonaAuthenticationError,batch_errors/total_DaytonaAuthenticationError,batch_errors/avg_AgentEnvironmentTimeoutError,batch_errors/total_AgentEnvironmentTimeoutError,batch_errors/avg_VerifierRuntimeError,batch_errors/total_VerifierRuntimeError,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_DaytonaError,batch_errors/total_DaytonaError,global_step
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,0,0.0,0,0.0,5121.6914,4576.6295,5769.2436,13905,1499,2503.5505,0.543,0.0021,0.1803,0.0,0.219,0.0,0.0,1.0,0.0,0.0323,0.6719,0.543,11.793,75.1959,379.3328,55.8,857.9687,478.6359,10.9023,0.1146,2.5571,72.0045,656.5065,728.9238,44.3353,6093.5913,52.8301,656.8044,5309.2801,0,1,128,1024,994,14,18,0.0859375,11.0,0.140625,18.0,0.0078125,1.0,,,,,,1
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,1,1.0,1,1.0,5871.2051,5096.5576,6791.5128,15067,1,3402.3282,0.543,-0.0001,0.1554,-0.0002,0.2129,-0.0057,0.0,1.0,0.0,0.0274,0.6875,0.543,12.9813,75.3753,375.7304,56.2,857.9687,482.2383,0.0121,0.0559,2.7428,72.8696,707.4558,780.6952,29.1329,2730.7674,52.7333,707.7694,1894.5939,0,2,64,512,464,19,44,0.046875,3.0,0.6875,44.0,,,0.015625,1.0,,,,2
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,2,2.0,2,1.0,5706.7969,4967.944,6318.9893,14787,1,3398.0642,0.4531,-0.0022,0.1487,0.0,0.198,0.0,0.0,1.0,0.0,0.0284,0.625,0.4531,13.4866,75.6917,379.7476,55.7,857.9687,478.2211,0.3416,0.0392,2.6589,73.1319,687.2284,760.7086,29.5563,2618.6945,52.8218,687.5371,1802.5023,0,3,64,512,491,9,20,0.015625,1.0,0.3125,20.0,,,,,,,,3
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,3,3.0,3,1.0,5825.2578,4940.4704,6689.5483,14273,1,3209.1042,0.4941,-0.0017,0.0659,0.0,0.2102,0.0,0.0,1.0,0.0,0.0177,0.5312,0.4941,13.7198,75.7789,383.0292,55.4,857.9687,474.9395,0.3775,0.038,2.6718,77.6872,669.8871,747.9511,28.8753,2255.3097,53.5368,670.2257,1451.1474,0,4,64,512,456,20,40,0.09375,6.0,0.625,40.0,0.03125,2.0,0.125,8.0,,,,4
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,4,3.75,3,1.0,5520.8398,5594.9188,5458.4856,16082,1,3484.2247,0.457,0.0055,0.1648,-0.0002,0.2057,-0.0064,0.0,1.0,0.0,0.0214,0.6094,0.457,14.6653,76.0574,385.3959,55.1,857.9687,472.5728,0.3196,0.044,2.9597,73.5587,681.9984,755.9482,29.4618,3542.6439,52.7498,682.3452,2730.9604,0,5,64,512,464,14,42,0.09375,6.0,0.65625,42.0,,,,,37.5898,,,5
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,5,4.125,3,1.0,5461.6738,5218.1189,5769.8894,14552,1,3328.2592,0.5586,0.014,0.1023,-0.0002,0.1969,-0.0076,0.0,1.0,0.0,0.0179,0.7031,0.5586,15.0607,76.0109,385.4834,55.1,857.9687,472.4853,0.366,0.0393,2.7127,68.8947,678.4589,747.7171,29.9495,2213.735,54.8743,678.7827,1408.4036,0,6,64,512,474,15,28,0.125,8.0,0.4375,28.0,,,0.03125,2.0,,,,6
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,6,4.6562,4,1.0,7798.1641,7124.3027,8179.4006,20083,1,3802.9833,0.3613,0.0039,0.1358,-0.0001,0.2261,-0.004,0.0,1.0,0.0,0.0185,0.5312,0.3613,18.1326,76.0253,380.9768,55.6,857.9687,476.9919,0.6987,0.0516,3.5227,93.4902,731.6962,825.5737,29.3423,4983.416,13.7041,732.0316,4140.5883,0,7,64,512,512,0,0,,,,,,,,,,,,7
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,0,0.0,0,0.0,4865.5312,4540.8412,5507.3605,14128,1386,2616.8319,0.6641,-0.0047,0.0958,-0.0,0.206,-0.0,0.0,1.0,0.0,0.0163,0.7188,0.6641,18.3456,76.1459,384.4786,55.2,857.9687,473.4901,0.3173,0.0353,2.9251,73.3734,646.7222,720.5432,29.7617,5669.0399,52.8866,647.1342,4892.6495,1,8,64,512,470,16,39,0.046875,3.0,0.609375,39.0,,,,,,,,8
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,1,1.0,1,1.0,5855.5039,5276.4774,6594.0844,13894,1,3271.3689,0.5605,0.0007,0.0512,-0.0,0.2096,-0.0006,0.0,1.0,0.0,0.0219,0.6406,0.5605,18.6552,76.4328,386.8148,54.9,857.9687,471.1539,0.3384,0.0375,2.7372,78.6677,660.2909,739.3591,30.4714,3239.5269,52.9167,660.6535,2444.512,1,9,64,512,432,25,70,0.125,8.0,1.078125,69.0,,,0.03125,2.0,,0.015625,1.0,9
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,2,2.0,2,1.0,5375.2695,5344.5093,5397.7162,14823,1,3872.2484,0.4219,0.0135,0.113,-0.0002,0.1788,-0.007,0.0,1.0,0.0,0.0154,0.5469,0.4219,18.8019,76.4187,385.8892,55.0,857.9687,472.0795,0.3767,0.0413,2.7467,73.8308,700.3324,774.57,29.1637,3338.1958,53.0117,700.6976,2507.8644,1,10,64,512,463,23,37,0.140625,9.0,0.578125,37.0,,,0.046875,3.0,36.7892,,,10
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,3,3.0,3,1.0,5192.8828,4679.4904,5726.7291,15138,1,3311.2403,0.5098,0.0084,0.0758,-0.0002,0.1873,-0.0056,0.0,1.0,0.0,0.0164,0.5625,0.5098,18.9517,76.4518,383.7386,55.3,857.9687,474.23,0.3045,0.0384,2.7622,74.4343,676.0857,750.8928,28.7197,2321.6045,57.2462,676.4197,1510.6996,1,11,64,512,437,26,61,0.140625,9.0,0.953125,61.0,,,0.078125,5.0,,,,11
+a2_rl_unitsyn_python_v3_402161,0.0,0,64,512,4,3.7969,3,1.0,5528.9668,5645.4653,5422.0674,16073,1,3727.4604,0.4785,0.0021,0.1344,-0.0001,0.1796,-0.0038,0.0,1.0,0.0,0.0198,0.6094,0.4785,19.0433,76.3939,383.2556,55.3,857.9687,474.7131,0.3795,0.0401,3.2865,78.4764,697.4553,776.3111,28.7288,2932.4204,52.7552,697.7943,2100.0414,1,12,16,128,110,8,15,0.0625,1.0,0.9375,15.0,,,0.125,2.0,,,,12
+a2_rl_unitsyn_python_v3_402162,0.0,0,64,512,5,1.1406,0,0.2344,5434.5586,5246.1829,5770.3587,13053,1,3046.1974,0.6406,-0.003,0.059,0.0,0.189,0.0,0.0,1.0,0.0,0.0114,0.7308,0.6406,11.8048,75.1254,372.3459,56.6,857.9687,485.6227,12.7402,0.0478,2.4902,73.844,683.1484,757.335,46.1195,8330.7966,51.8296,683.4422,7519.1416,1,13,65,520,426,31,55,0.5846153846153846,38.0,0.8461538461538461,55.0,,,0.015384615384615385,1.0,,,,13
+a2_rl_unitsyn_python_v3_402162,0.0,0,64,512,1,1.0,1,1.0,6345.9297,6347.6109,6344.6529,18003,1,4261.8702,0.4316,0.0217,0.1127,-0.0005,0.1793,-0.0156,0.0,1.0,0.0,0.0191,0.5625,0.4316,15.5447,75.3049,399.4698,53.4,857.9687,458.4989,0.3343,0.0742,3.1141,89.1038,721.9162,811.4036,33.5523,871.8394,57.2936,722.2251,0.0049,1,14,11,88,84,4,0,0.36363636363636365,4.0,,,,,,,,,,14

training_logs/20260429_184357_reward_vs_steps.png ADDED Viewed

Git LFS Details

SHA256: b02fc1b1e4e650eeb87f0d0987a2b57a4c90d0c0e41d359207a01f4e8bdc3417
Pointer size: 131 Bytes
Size of remote file: 151 kB

training_logs/20260429_184357_trial_results.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

training_logs/20260429_184357_turn_count_distribution.png ADDED Viewed

Git LFS Details

SHA256: 401a5863733e2067ea1fe3fe05c634f6893bb68c410018c8d0e718561431f766
Pointer size: 131 Bytes
Size of remote file: 109 kB

training_logs/20260429_184357_vllm_metrics_a2_rl_unitsyn_python_v3_402161.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

training_logs/20260429_184357_vllm_metrics_a2_rl_unitsyn_python_v3_402162.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

training_logs/20260429_184357_vllm_metrics_table.csv ADDED Viewed

The diff for this file is too large to render. See raw diff