Add parsed training metrics and plots
Browse files- .gitattributes +1 -0
- training_logs/20260428_203615_metrics_a2_rl_stack_pytest_v2_387401.csv +15 -0
- training_logs/20260428_203615_metrics_report.md +231 -0
- training_logs/20260428_203615_metrics_table.csv +15 -0
- training_logs/20260428_203615_reward_vs_steps.png +3 -0
- training_logs/20260428_203615_trial_results.csv +0 -0
- training_logs/20260428_203615_turn_count_distribution.png +0 -0
- training_logs/20260428_203615_vllm_metrics_a2_rl_stack_pytest_v2_387401.csv +0 -0
- training_logs/20260428_203615_vllm_metrics_table.csv +0 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
training_logs/20260428_203615_reward_vs_steps.png filter=lfs diff=lfs merge=lfs -text
|
training_logs/20260428_203615_metrics_a2_rl_stack_pytest_v2_387401.csv
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_RuntimeError,batch_errors/total_RuntimeError
|
| 2 |
+
0.0,0,64,512,0,0.0,0,0.0,4741.9902,4480.4132,4822.9386,14849,1,2194.273,0.2363,0.0057,0.1467,0.0,0.1855,0.0,0.0,1.0,0.0,0.0289,0.4375,0.2363,12.8265,73.2355,355.5015,58.6,857.9688,502.4673,10.796,0.1113,2.7561,62.0087,643.9144,706.3755,43.6553,2779.7671,53.5533,644.2551,2017.0819,0,1,128.0,1024.0,1022.0,2.0,0.0,0.015625,2.0,,,
|
| 3 |
+
0.0,0,64,512,1,1.0,1,1.0,5207.4805,4587.3176,5459.6346,15353,1,2543.871,0.2891,0.0032,0.2156,-0.0,0.1818,-0.0003,0.0,1.0,0.0,0.039,0.5,0.2891,13.6703,73.4764,355.181,58.6,857.9688,502.7878,0.0108,0.0549,3.5254,71.9423,683.3507,755.6622,31.4849,1392.6153,52.8988,683.6647,580.5066,0,2,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,
|
| 4 |
+
0.0,0,64,512,2,2.0,2,1.0,5185.8047,4404.2423,5662.6069,18658,1,2655.8627,0.3789,0.0056,0.1026,-0.0,0.1694,-0.0,0.0,1.0,0.0,0.0307,0.4688,0.3789,16.1691,73.7953,352.0549,59.0,857.9688,505.9138,0.0087,0.0759,3.5505,84.1237,702.8472,787.3638,32.8388,1345.9698,52.8027,703.1638,502.2264,0,3,64.0,512.0,510.0,1.0,0.0,0.03125,2.0,,,
|
| 5 |
+
0.0,0,64,512,3,3.0,3,1.0,6173.4297,5171.7151,6744.9601,23599,1,2913.9495,0.3633,-0.0048,0.1501,0.0,0.1671,0.0,0.0,1.0,0.0,0.0269,0.5781,0.3633,19.7277,74.2299,348.8676,59.3,857.9688,509.1012,0.0089,0.0556,4.1262,92.6868,765.1808,858.2347,33.0996,2000.5081,53.4025,765.4919,1084.7149,0,4,64.0,512.0,512.0,0.0,0.0,,,,,
|
| 6 |
+
0.0,0,64,512,4,4.0,4,1.0,6649.6289,5533.7975,7170.7765,21118,1405,3018.764,0.3184,0.002,0.0977,-0.0,0.1669,-0.0,0.0,1.0,0.0,0.0231,0.4062,0.3184,20.1003,74.4495,353.0518,58.9,857.9688,504.9169,0.0081,0.052,3.8742,89.0204,756.9617,846.374,33.0429,1451.9746,52.8114,757.3012,548.8843,0,5,64.0,512.0,512.0,0.0,0.0,,,36.8234,,
|
| 7 |
+
0.0,0,64,512,5,5.0,5,1.0,6952.5547,6301.3432,7273.414,20095,1889,2871.2745,0.3301,0.0065,0.1201,-0.0,0.1707,-0.0,0.0,1.0,0.0,0.0208,0.4688,0.3301,20.2417,74.4911,352.5635,58.9,857.9688,505.4053,0.008,0.0486,3.8545,91.1339,754.2028,845.7448,31.6035,1824.3834,56.2481,754.562,918.5016,0,6,64.0,512.0,512.0,0.0,0.0,,,,,
|
| 8 |
+
0.0,0,64,512,6,6.0,6,1.0,7821.9141,6336.3427,8397.6233,28454,2297,3823.9451,0.2793,-0.0032,0.1026,-0.0,0.1755,-0.0,0.0,1.0,0.0,0.0234,0.4062,0.2793,24.1808,74.7747,405.2173,52.8,857.9688,452.7515,0.011,0.0608,4.7397,122.4595,855.6703,978.5429,33.4132,4135.5872,14.0068,856.0222,3138.2634,0,7,64.0,512.0,512.0,0.0,0.0,,,,,
|
| 9 |
+
0.0,0,64,512,0,0.0,0,0.0,5879.5469,5305.2657,6269.3049,14895,1513,2089.5078,0.4043,0.0018,0.096,-0.0,0.1721,-0.0,0.0,1.0,0.0,0.0209,0.5156,0.4043,24.3558,75.0631,401.2662,53.2,857.9688,456.7026,0.01,0.0396,3.3153,69.9986,642.9688,713.3496,32.0209,3348.3309,52.4998,643.311,2579.128,1,8,64.0,512.0,512.0,0.0,0.0,,,,,
|
| 10 |
+
0.0,0,64,512,1,1.0,1,1.0,6734.0,6444.1044,6893.8818,18643,2175,2384.6491,0.3555,0.0017,0.1221,-0.0,0.1782,-0.0,0.0,1.0,0.0,0.0215,0.4375,0.3555,24.6022,75.2357,411.7228,52.0,857.9688,446.246,0.009,0.0439,3.6223,78.3919,702.1173,780.9468,31.0012,1826.3469,52.3499,702.5107,989.3979,1,9,64.0,512.0,509.0,2.0,0.0,0.046875,3.0,,,
|
| 11 |
+
0.0,0,64,512,2,2.0,2,1.0,6511.9746,5525.2532,7336.0108,18605,1,2862.176,0.4551,0.0042,0.1035,-0.0001,0.1794,-0.002,0.0,1.0,0.0,0.0187,0.5469,0.4551,24.6203,75.1738,409.3499,52.3,857.9688,448.6188,0.0111,0.044,3.5639,90.4193,734.5286,825.4112,31.3292,1651.042,52.4615,734.9477,769.5638,1,10,64.0,512.0,512.0,0.0,0.0,,,36.5961,,
|
| 12 |
+
0.0,0,64,512,3,3.0,3,1.0,7150.2969,5831.9757,8037.7941,22262,1340,2965.0407,0.4023,0.0008,0.1053,-0.0,0.1771,-0.0,0.0,1.0,0.0,0.0194,0.5156,0.4023,24.8219,75.2715,408.2212,52.4,857.9688,449.7476,0.0086,0.0515,4.1267,95.6788,762.1538,858.3374,32.4094,2182.5423,51.6537,762.6066,1268.3906,1,11,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,
|
| 13 |
+
0.0,0,64,512,4,4.0,4,1.0,7161.0645,6208.9091,7530.0569,19269,1,2859.9143,0.2793,-0.0043,0.1026,0.0,0.1809,0.0,0.0,1.0,0.0,0.0175,0.3906,0.2793,24.8525,75.2184,415.8303,51.5,857.9688,442.1384,0.0083,0.0535,3.7269,95.4867,753.3097,849.3001,32.3294,1734.4666,56.6358,753.7596,824.7671,1,12,64.0,512.0,509.0,2.0,0.0,0.03125,2.0,,0.015625,1.0
|
| 14 |
+
0.0,0,64,512,5,5.0,5,1.0,7395.9434,6484.1768,7894.5227,19744,1,2920.1255,0.3535,0.0007,0.1193,0.0,0.1803,0.0,0.0,1.0,0.0,0.0186,0.4844,0.3535,24.9543,75.2349,412.6592,51.9,857.9688,445.3096,0.0093,0.05,3.7965,93.9943,736.3408,830.7211,32.1481,1894.2929,59.5623,736.6765,1000.1749,1,13,64.0,512.0,512.0,0.0,0.0,,,,,
|
| 15 |
+
0.0,0,64,512,6,6.0,6,1.0,8946.791,7245.6212,9537.7237,29403,2237,4155.2128,0.2578,0.002,0.073,0.0,0.1889,0.0,0.0,1.0,0.0,0.0133,0.3438,0.2578,26.1139,75.4758,416.4844,51.5,857.9688,441.4843,0.0095,0.0622,4.9616,132.9495,904.4659,1037.814,32.4082,7012.0493,13.8671,904.8018,5955.3685,1,14,,,,,,,,,,
|
training_logs/20260428_203615_metrics_report.md
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SkyRL Training Metrics Analysis
|
| 2 |
+
|
| 3 |
+
Generated from 1 log files
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
| Log File | Total Steps | Metric Blocks | Final Reward (mean) | Final Reward (max) | Total Time (s) |
|
| 8 |
+
|----------|-------------|---------------|---------------------|-------------------|----------------|
|
| 9 |
+
| a2_rl_stack_pytest_v2_387401 | 14 | 14 | 0.3359 | 0.4551 | 34579.9 |
|
| 10 |
+
|
| 11 |
+
## Async Metrics
|
| 12 |
+
|
| 13 |
+
| | Mean | Std | Min | Max | Count |
|
| 14 |
+
|:------------------------------|-----------:|---------:|------:|------:|--------:|
|
| 15 |
+
| async/discard_rate | 0 | 0 | 0 | 0 | 14 |
|
| 16 |
+
| async/discarded_count | 0 | 0 | 0 | 0 | 14 |
|
| 17 |
+
| async/effective_batch_groups | 64 | 0 | 64 | 64 | 14 |
|
| 18 |
+
| async/effective_batch_samples | 512 | 0 | 512 | 512 | 14 |
|
| 19 |
+
| async/staleness_max | 3 | 2.0755 | 0 | 6 | 14 |
|
| 20 |
+
| async/staleness_mean | 3 | 2.0755 | 0 | 6 | 14 |
|
| 21 |
+
| async/staleness_min | 3 | 2.0755 | 0 | 6 | 14 |
|
| 22 |
+
| async/staleness_ratio | 0.857143 | 0.363137 | 0 | 1 | 14 |
|
| 23 |
+
|
| 24 |
+
## Generate Metrics
|
| 25 |
+
|
| 26 |
+
| | Mean | Std | Min | Max | Count |
|
| 27 |
+
|:-------------------------------------|----------:|---------:|---------:|---------:|--------:|
|
| 28 |
+
| generate/avg_num_tokens | 6608.03 | 1126.45 | 4741.99 | 8946.79 | 14 |
|
| 29 |
+
| generate/avg_tokens_non_zero_rewards | 5704.32 | 854.75 | 4404.24 | 7245.62 | 14 |
|
| 30 |
+
| generate/avg_tokens_zero_rewards | 7073.66 | 1246.64 | 4822.94 | 9537.72 | 14 |
|
| 31 |
+
| generate/max_num_tokens | 20353.4 | 4453.55 | 14849 | 29403 | 14 |
|
| 32 |
+
| generate/min_num_tokens | 918.786 | 993.044 | 1 | 2297 | 14 |
|
| 33 |
+
| generate/std_num_tokens | 2875.61 | 558.202 | 2089.51 | 4155.21 | 14 |
|
| 34 |
+
|
| 35 |
+
## Loss Metrics
|
| 36 |
+
|
| 37 |
+
| | Mean | Std | Min | Max | Count |
|
| 38 |
+
|:----------------------------|-----------:|-----------:|--------:|-------:|--------:|
|
| 39 |
+
| loss/avg_final_rewards | 0.335943 | 0.0629694 | 0.2363 | 0.4551 | 14 |
|
| 40 |
+
| loss/avg_raw_advantages | 0.00156429 | 0.00358149 | -0.0048 | 0.0065 | 14 |
|
| 41 |
+
| loss/avg_raw_advantages_abs | 0.118371 | 0.0344238 | 0.073 | 0.2156 | 14 |
|
| 42 |
+
|
| 43 |
+
## Policy Metrics
|
| 44 |
+
|
| 45 |
+
| | Mean | Std | Min | Max | Count |
|
| 46 |
+
|:---------------------------|-------------:|------------:|--------:|-------:|--------:|
|
| 47 |
+
| policy/final_loss | -7.14286e-06 | 2.67261e-05 | -0.0001 | 0 | 14 |
|
| 48 |
+
| policy/policy_entropy | 0.1767 | 0.00675209 | 0.1669 | 0.1889 | 14 |
|
| 49 |
+
| policy/policy_loss | -0.000164286 | 0.000534368 | -0.002 | 0 | 14 |
|
| 50 |
+
| policy/policy_lr | 0 | 0 | 0 | 0 | 14 |
|
| 51 |
+
| policy/policy_update_steps | 1 | 0 | 1 | 1 | 14 |
|
| 52 |
+
| policy/ppo_clip_ratio | 0 | 0 | 0 | 0 | 14 |
|
| 53 |
+
| policy/raw_grad_norm | 0.02305 | 0.00651681 | 0.0133 | 0.039 | 14 |
|
| 54 |
+
|
| 55 |
+
## Reward Metrics
|
| 56 |
+
|
| 57 |
+
| | Mean | Std | Min | Max | Count |
|
| 58 |
+
|:----------------------|---------:|----------:|-------:|-------:|--------:|
|
| 59 |
+
| reward/avg_pass_at_8 | 0.464286 | 0.0649824 | 0.3438 | 0.5781 | 14 |
|
| 60 |
+
| reward/avg_raw_reward | 0.335943 | 0.0629694 | 0.2363 | 0.4551 | 14 |
|
| 61 |
+
|
| 62 |
+
## System Metrics
|
| 63 |
+
|
| 64 |
+
| | Mean | Std | Min | Max | Count |
|
| 65 |
+
|:------------------------|---------:|-------------:|---------:|---------:|--------:|
|
| 66 |
+
| system/process_rss_gb | 21.517 | 4.48783 | 12.8265 | 26.1139 | 14 |
|
| 67 |
+
| system/process_vms_gb | 74.6518 | 0.727882 | 73.2355 | 75.4758 | 14 |
|
| 68 |
+
| system/ram_available_gb | 385.569 | 29.6712 | 348.868 | 416.484 | 14 |
|
| 69 |
+
| system/ram_percent | 55.0643 | 3.46424 | 51.5 | 59.3 | 14 |
|
| 70 |
+
| system/ram_total_gb | 857.969 | 2.35957e-13 | 857.969 | 857.969 | 14 |
|
| 71 |
+
| system/ram_used_gb | 472.399 | 29.6712 | 441.484 | 509.101 | 14 |
|
| 72 |
+
|
| 73 |
+
## Timing Metrics
|
| 74 |
+
|
| 75 |
+
| | Mean | Std | Min | Max | Count |
|
| 76 |
+
|:--------------------------------------|-------------:|-------------:|----------:|----------:|--------:|
|
| 77 |
+
| timing/cleanup_old_checkpoints | 0.779807 | 2.88286 | 0.008 | 10.796 | 14 |
|
| 78 |
+
| timing/compute_advantages_and_returns | 0.0574143 | 0.0179349 | 0.0396 | 0.1113 | 14 |
|
| 79 |
+
| timing/convert_to_training_input | 3.82427 | 0.55475 | 2.7561 | 4.9616 | 14 |
|
| 80 |
+
| timing/fwd_logprobs_values_reward | 90.7353 | 18.9282 | 62.0087 | 132.95 | 14 |
|
| 81 |
+
| timing/policy_train | 742.715 | 71.7004 | 642.969 | 904.466 | 14 |
|
| 82 |
+
| timing/run_training | 833.87 | 90.3807 | 706.375 | 1037.81 | 14 |
|
| 83 |
+
| timing/save_checkpoints | 33.056 | 3.13253 | 31.0012 | 43.6553 | 14 |
|
| 84 |
+
| timing/step | 2469.99 | 1529.45 | 1345.97 | 7012.05 | 14 |
|
| 85 |
+
| timing/sync_weights | 48.1967 | 14.6734 | 13.8671 | 59.5623 | 14 |
|
| 86 |
+
| timing/train_critic_and_policy | 743.077 | 71.7036 | 643.311 | 904.802 | 14 |
|
| 87 |
+
| timing/wait_for_generation_buffer | 1584.07 | 1485.24 | 502.226 | 5955.37 | 14 |
|
| 88 |
+
| timing/save_hf_model | 36.7097 | 0.160725 | 36.5961 | 36.8234 | 2 |
|
| 89 |
+
|
| 90 |
+
## Trainer Metrics
|
| 91 |
+
|
| 92 |
+
| | Mean | Std | Min | Max | Count |
|
| 93 |
+
|:--------------------|-------:|---------:|------:|------:|--------:|
|
| 94 |
+
| trainer/epoch | 0.5 | 0.518875 | 0 | 1 | 14 |
|
| 95 |
+
| trainer/global_step | 7.5 | 4.1833 | 1 | 14 | 14 |
|
| 96 |
+
|
| 97 |
+
## Batch_Errors Metrics
|
| 98 |
+
|
| 99 |
+
| | Mean | Std | Min | Max | Count |
|
| 100 |
+
|:----------------------------------------|------------:|------------:|-----------:|------------:|--------:|
|
| 101 |
+
| batch_errors/total_batches | 68.9231 | 17.7504 | 64 | 128 | 13 |
|
| 102 |
+
| batch_errors/total_instances | 551.385 | 142.003 | 512 | 1024 | 13 |
|
| 103 |
+
| batch_errors/total_successful | 550.462 | 141.684 | 509 | 1022 | 13 |
|
| 104 |
+
| batch_errors/total_failed | 0.692308 | 0.85485 | 0 | 2 | 13 |
|
| 105 |
+
| batch_errors/total_masked | 0 | 0 | 0 | 0 | 13 |
|
| 106 |
+
| batch_errors/avg_VerifierTimeoutError | 0.0260417 | 0.0127578 | 0.015625 | 0.046875 | 6 |
|
| 107 |
+
| batch_errors/total_VerifierTimeoutError | 1.83333 | 0.752773 | 1 | 3 | 6 |
|
| 108 |
+
| batch_errors/avg_RuntimeError | 0.015625 | nan | 0.015625 | 0.015625 | 1 |
|
| 109 |
+
| batch_errors/total_RuntimeError | 1 | nan | 1 | 1 | 1 |
|
| 110 |
+
|
| 111 |
+
## Training Progression by Log
|
| 112 |
+
|
| 113 |
+
### a2_rl_stack_pytest_v2_387401
|
| 114 |
+
|
| 115 |
+
| Step | Reward | Pass@8 | KL | Loss | Step Time (s) | Gen Wait (s) |
|
| 116 |
+
|------|--------|--------|-----|------|---------------|-------------|
|
| 117 |
+
| 1 | 0.2363 | 0.4375 | 0.000000 | 0.0000 | 2779.8 | 2017.1 |
|
| 118 |
+
| 2 | 0.2891 | 0.5000 | 0.000000 | -0.0000 | 1392.6 | 580.5 |
|
| 119 |
+
| 3 | 0.3789 | 0.4688 | 0.000000 | -0.0000 | 1346.0 | 502.2 |
|
| 120 |
+
| 4 | 0.3633 | 0.5781 | 0.000000 | 0.0000 | 2000.5 | 1084.7 |
|
| 121 |
+
| 5 | 0.3184 | 0.4062 | 0.000000 | -0.0000 | 1452.0 | 548.9 |
|
| 122 |
+
| 6 | 0.3301 | 0.4688 | 0.000000 | -0.0000 | 1824.4 | 918.5 |
|
| 123 |
+
| 7 | 0.2793 | 0.4062 | 0.000000 | -0.0000 | 4135.6 | 3138.3 |
|
| 124 |
+
| 8 | 0.4043 | 0.5156 | 0.000000 | -0.0000 | 3348.3 | 2579.1 |
|
| 125 |
+
| 9 | 0.3555 | 0.4375 | 0.000000 | -0.0000 | 1826.3 | 989.4 |
|
| 126 |
+
| 10 | 0.4551 | 0.5469 | 0.000000 | -0.0001 | 1651.0 | 769.6 |
|
| 127 |
+
| 11 | 0.4023 | 0.5156 | 0.000000 | -0.0000 | 2182.5 | 1268.4 |
|
| 128 |
+
| 12 | 0.2793 | 0.3906 | 0.000000 | 0.0000 | 1734.5 | 824.8 |
|
| 129 |
+
| 13 | 0.3535 | 0.4844 | 0.000000 | 0.0000 | 1894.3 | 1000.2 |
|
| 130 |
+
| 14 | 0.2578 | 0.3438 | 0.000000 | 0.0000 | 7012.0 | 5955.4 |
|
| 131 |
+
|
| 132 |
+
## Timing Analysis
|
| 133 |
+
|
| 134 |
+
### Average Time Breakdown (% of step time)
|
| 135 |
+
|
| 136 |
+
| Component | Avg % of Step Time |
|
| 137 |
+
|-----------|-------------------|
|
| 138 |
+
| wait_for_generation_buffer | 56.5% |
|
| 139 |
+
| run_training | 40.7% |
|
| 140 |
+
| train_critic_and_policy | 36.4% |
|
| 141 |
+
| policy_train | 36.4% |
|
| 142 |
+
| fwd_logprobs_values_reward | 4.4% |
|
| 143 |
+
| sync_weights | 2.6% |
|
| 144 |
+
| save_hf_model | 2.4% |
|
| 145 |
+
| save_checkpoints | 1.6% |
|
| 146 |
+
| convert_to_training_input | 0.2% |
|
| 147 |
+
| cleanup_old_checkpoints | 0.0% |
|
| 148 |
+
| compute_advantages_and_returns | 0.0% |
|
| 149 |
+
|
| 150 |
+
## vLLM Inference Engine Analysis
|
| 151 |
+
|
| 152 |
+
Metrics from vLLM stat loggers (V1LoggingStatLoggerFixed).
|
| 153 |
+
|
| 154 |
+
> **Note**: Ray deduplicates similar log messages with `[repeated Nx across cluster]`,
|
| 155 |
+
> so we typically capture stats from one engine per timestamp. The stats shown are
|
| 156 |
+
> **per-engine** values. Multiply by num_inference_engines for cluster-wide estimates.
|
| 157 |
+
|
| 158 |
+
### Summary by Log (Per-Engine Stats)
|
| 159 |
+
|
| 160 |
+
| Log | Avg Running/Engine | Avg Waiting/Engine | Avg Gen Throughput/Engine | Avg KV Cache % | Avg Prefix Hit % |
|
| 161 |
+
|-----|-------------------|-------------------|--------------------------|----------------|------------------|
|
| 162 |
+
| a2_rl_stack_pytest_v2_387401 | 3.5 | 0.0 | 69.6 tok/s | 5.9% | 85.6% |
|
| 163 |
+
|
| 164 |
+
### Utilization Analysis (Per-Engine)
|
| 165 |
+
|
| 166 |
+
Key indicators of inference engine utilization:
|
| 167 |
+
|
| 168 |
+
- **Running requests/engine**: Concurrent requests being processed by each engine
|
| 169 |
+
- **Waiting requests**: Requests queued (0 = engine not saturated, has spare capacity)
|
| 170 |
+
- **Generation throughput**: Decode tokens/sec per engine
|
| 171 |
+
- 8B model on H100 can do **1000+ tok/s** when saturated
|
| 172 |
+
- If seeing <300 tok/s with 0 waiting, engine is **starved for requests**
|
| 173 |
+
|
| 174 |
+
#### a2_rl_stack_pytest_v2_387401
|
| 175 |
+
|
| 176 |
+
- **Running requests/engine**: avg=3.5, max=14
|
| 177 |
+
- **Waiting requests**: avg=0.0, max=0
|
| 178 |
+
- **Generation throughput/engine**: avg=69.6 tok/s, max=319.7 tok/s
|
| 179 |
+
- **KV cache usage**: avg=5.9%
|
| 180 |
+
- **Prefix cache hit rate**: avg=85.6%
|
| 181 |
+
- ⚠️ **Underutilized**: Engines starved for requests (0 waiting, avg 3.5 running)
|
| 182 |
+
- Bottleneck is likely upstream (environment execution, not inference)
|
| 183 |
+
|
| 184 |
+
## Trial-Level Analysis (from result.json)
|
| 185 |
+
|
| 186 |
+
Total trials parsed: 7168
|
| 187 |
+
|
| 188 |
+
### Turn Count Statistics
|
| 189 |
+
|
| 190 |
+
| Metric | Value |
|
| 191 |
+
|--------|-------|
|
| 192 |
+
| Mean | 3.3 |
|
| 193 |
+
| Median | 3.0 |
|
| 194 |
+
| Std | 1.6 |
|
| 195 |
+
| Min | 2 |
|
| 196 |
+
| Max | 19 |
|
| 197 |
+
| Count | 7168 |
|
| 198 |
+
|
| 199 |
+
### Exception Distribution
|
| 200 |
+
|
| 201 |
+
| Exception Type | Count | % |
|
| 202 |
+
|---------------|-------|---|
|
| 203 |
+
| No exception | 7143 | 99.7% |
|
| 204 |
+
| AgentTimeoutError | 12 | 0.2% |
|
| 205 |
+
| VerifierTimeoutError | 11 | 0.2% |
|
| 206 |
+
| RuntimeError | 1 | 0.0% |
|
| 207 |
+
| ContextLengthExceededError | 1 | 0.0% |
|
| 208 |
+
|
| 209 |
+
### Turn Count by Exception Type
|
| 210 |
+
|
| 211 |
+
| Exception Type | Mean Turns | Median Turns | Count |
|
| 212 |
+
|---------------|-----------|-------------|-------|
|
| 213 |
+
| ContextLengthExceededError | 15.0 | 15.0 | 1 |
|
| 214 |
+
| AgentTimeoutError | 9.3 | 9.5 | 12 |
|
| 215 |
+
| VerifierTimeoutError | 4.4 | 4.0 | 11 |
|
| 216 |
+
| RuntimeError | 4.0 | 4.0 | 1 |
|
| 217 |
+
| No exception | 3.3 | 3.0 | 7143 |
|
| 218 |
+
|
| 219 |
+
### Turn Count by Outcome
|
| 220 |
+
|
| 221 |
+
| Outcome | Mean Turns | Median Turns | Count |
|
| 222 |
+
|---------|-----------|-------------|-------|
|
| 223 |
+
| Success | 2.9 | 3.0 | 2408 |
|
| 224 |
+
| Failure | 3.5 | 3.0 | 4748 |
|
| 225 |
+
|
| 226 |
+
### Reward Summary
|
| 227 |
+
|
| 228 |
+
- Mean reward: 0.3365
|
| 229 |
+
- Success rate: 33.7%
|
| 230 |
+
- Trials with reward data: 7156
|
| 231 |
+
|
training_logs/20260428_203615_metrics_table.csv
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
log_file,async/discard_rate,async/discarded_count,async/effective_batch_groups,async/effective_batch_samples,async/staleness_max,async/staleness_mean,async/staleness_min,async/staleness_ratio,generate/avg_num_tokens,generate/avg_tokens_non_zero_rewards,generate/avg_tokens_zero_rewards,generate/max_num_tokens,generate/min_num_tokens,generate/std_num_tokens,loss/avg_final_rewards,loss/avg_raw_advantages,loss/avg_raw_advantages_abs,policy/final_loss,policy/policy_entropy,policy/policy_loss,policy/policy_lr,policy/policy_update_steps,policy/ppo_clip_ratio,policy/raw_grad_norm,reward/avg_pass_at_8,reward/avg_raw_reward,system/process_rss_gb,system/process_vms_gb,system/ram_available_gb,system/ram_percent,system/ram_total_gb,system/ram_used_gb,timing/cleanup_old_checkpoints,timing/compute_advantages_and_returns,timing/convert_to_training_input,timing/fwd_logprobs_values_reward,timing/policy_train,timing/run_training,timing/save_checkpoints,timing/step,timing/sync_weights,timing/train_critic_and_policy,timing/wait_for_generation_buffer,trainer/epoch,trainer/global_step,batch_errors/total_batches,batch_errors/total_instances,batch_errors/total_successful,batch_errors/total_failed,batch_errors/total_masked,batch_errors/avg_VerifierTimeoutError,batch_errors/total_VerifierTimeoutError,timing/save_hf_model,batch_errors/avg_RuntimeError,batch_errors/total_RuntimeError,global_step
|
| 2 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,0,0.0,0,0.0,4741.9902,4480.4132,4822.9386,14849,1,2194.273,0.2363,0.0057,0.1467,0.0,0.1855,0.0,0.0,1.0,0.0,0.0289,0.4375,0.2363,12.8265,73.2355,355.5015,58.6,857.9688,502.4673,10.796,0.1113,2.7561,62.0087,643.9144,706.3755,43.6553,2779.7671,53.5533,644.2551,2017.0819,0,1,128.0,1024.0,1022.0,2.0,0.0,0.015625,2.0,,,,1
|
| 3 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,1,1.0,1,1.0,5207.4805,4587.3176,5459.6346,15353,1,2543.871,0.2891,0.0032,0.2156,-0.0,0.1818,-0.0003,0.0,1.0,0.0,0.039,0.5,0.2891,13.6703,73.4764,355.181,58.6,857.9688,502.7878,0.0108,0.0549,3.5254,71.9423,683.3507,755.6622,31.4849,1392.6153,52.8988,683.6647,580.5066,0,2,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,,2
|
| 4 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,2,2.0,2,1.0,5185.8047,4404.2423,5662.6069,18658,1,2655.8627,0.3789,0.0056,0.1026,-0.0,0.1694,-0.0,0.0,1.0,0.0,0.0307,0.4688,0.3789,16.1691,73.7953,352.0549,59.0,857.9688,505.9138,0.0087,0.0759,3.5505,84.1237,702.8472,787.3638,32.8388,1345.9698,52.8027,703.1638,502.2264,0,3,64.0,512.0,510.0,1.0,0.0,0.03125,2.0,,,,3
|
| 5 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,3,3.0,3,1.0,6173.4297,5171.7151,6744.9601,23599,1,2913.9495,0.3633,-0.0048,0.1501,0.0,0.1671,0.0,0.0,1.0,0.0,0.0269,0.5781,0.3633,19.7277,74.2299,348.8676,59.3,857.9688,509.1012,0.0089,0.0556,4.1262,92.6868,765.1808,858.2347,33.0996,2000.5081,53.4025,765.4919,1084.7149,0,4,64.0,512.0,512.0,0.0,0.0,,,,,,4
|
| 6 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,4,4.0,4,1.0,6649.6289,5533.7975,7170.7765,21118,1405,3018.764,0.3184,0.002,0.0977,-0.0,0.1669,-0.0,0.0,1.0,0.0,0.0231,0.4062,0.3184,20.1003,74.4495,353.0518,58.9,857.9688,504.9169,0.0081,0.052,3.8742,89.0204,756.9617,846.374,33.0429,1451.9746,52.8114,757.3012,548.8843,0,5,64.0,512.0,512.0,0.0,0.0,,,36.8234,,,5
|
| 7 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,5,5.0,5,1.0,6952.5547,6301.3432,7273.414,20095,1889,2871.2745,0.3301,0.0065,0.1201,-0.0,0.1707,-0.0,0.0,1.0,0.0,0.0208,0.4688,0.3301,20.2417,74.4911,352.5635,58.9,857.9688,505.4053,0.008,0.0486,3.8545,91.1339,754.2028,845.7448,31.6035,1824.3834,56.2481,754.562,918.5016,0,6,64.0,512.0,512.0,0.0,0.0,,,,,,6
|
| 8 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,6,6.0,6,1.0,7821.9141,6336.3427,8397.6233,28454,2297,3823.9451,0.2793,-0.0032,0.1026,-0.0,0.1755,-0.0,0.0,1.0,0.0,0.0234,0.4062,0.2793,24.1808,74.7747,405.2173,52.8,857.9688,452.7515,0.011,0.0608,4.7397,122.4595,855.6703,978.5429,33.4132,4135.5872,14.0068,856.0222,3138.2634,0,7,64.0,512.0,512.0,0.0,0.0,,,,,,7
|
| 9 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,0,0.0,0,0.0,5879.5469,5305.2657,6269.3049,14895,1513,2089.5078,0.4043,0.0018,0.096,-0.0,0.1721,-0.0,0.0,1.0,0.0,0.0209,0.5156,0.4043,24.3558,75.0631,401.2662,53.2,857.9688,456.7026,0.01,0.0396,3.3153,69.9986,642.9688,713.3496,32.0209,3348.3309,52.4998,643.311,2579.128,1,8,64.0,512.0,512.0,0.0,0.0,,,,,,8
|
| 10 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,1,1.0,1,1.0,6734.0,6444.1044,6893.8818,18643,2175,2384.6491,0.3555,0.0017,0.1221,-0.0,0.1782,-0.0,0.0,1.0,0.0,0.0215,0.4375,0.3555,24.6022,75.2357,411.7228,52.0,857.9688,446.246,0.009,0.0439,3.6223,78.3919,702.1173,780.9468,31.0012,1826.3469,52.3499,702.5107,989.3979,1,9,64.0,512.0,509.0,2.0,0.0,0.046875,3.0,,,,9
|
| 11 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,2,2.0,2,1.0,6511.9746,5525.2532,7336.0108,18605,1,2862.176,0.4551,0.0042,0.1035,-0.0001,0.1794,-0.002,0.0,1.0,0.0,0.0187,0.5469,0.4551,24.6203,75.1738,409.3499,52.3,857.9688,448.6188,0.0111,0.044,3.5639,90.4193,734.5286,825.4112,31.3292,1651.042,52.4615,734.9477,769.5638,1,10,64.0,512.0,512.0,0.0,0.0,,,36.5961,,,10
|
| 12 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,3,3.0,3,1.0,7150.2969,5831.9757,8037.7941,22262,1340,2965.0407,0.4023,0.0008,0.1053,-0.0,0.1771,-0.0,0.0,1.0,0.0,0.0194,0.5156,0.4023,24.8219,75.2715,408.2212,52.4,857.9688,449.7476,0.0086,0.0515,4.1267,95.6788,762.1538,858.3374,32.4094,2182.5423,51.6537,762.6066,1268.3906,1,11,64.0,512.0,511.0,1.0,0.0,0.015625,1.0,,,,11
|
| 13 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,4,4.0,4,1.0,7161.0645,6208.9091,7530.0569,19269,1,2859.9143,0.2793,-0.0043,0.1026,0.0,0.1809,0.0,0.0,1.0,0.0,0.0175,0.3906,0.2793,24.8525,75.2184,415.8303,51.5,857.9688,442.1384,0.0083,0.0535,3.7269,95.4867,753.3097,849.3001,32.3294,1734.4666,56.6358,753.7596,824.7671,1,12,64.0,512.0,509.0,2.0,0.0,0.03125,2.0,,0.015625,1.0,12
|
| 14 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,5,5.0,5,1.0,7395.9434,6484.1768,7894.5227,19744,1,2920.1255,0.3535,0.0007,0.1193,0.0,0.1803,0.0,0.0,1.0,0.0,0.0186,0.4844,0.3535,24.9543,75.2349,412.6592,51.9,857.9688,445.3096,0.0093,0.05,3.7965,93.9943,736.3408,830.7211,32.1481,1894.2929,59.5623,736.6765,1000.1749,1,13,64.0,512.0,512.0,0.0,0.0,,,,,,13
|
| 15 |
+
a2_rl_stack_pytest_v2_387401,0.0,0,64,512,6,6.0,6,1.0,8946.791,7245.6212,9537.7237,29403,2237,4155.2128,0.2578,0.002,0.073,0.0,0.1889,0.0,0.0,1.0,0.0,0.0133,0.3438,0.2578,26.1139,75.4758,416.4844,51.5,857.9688,441.4843,0.0095,0.0622,4.9616,132.9495,904.4659,1037.814,32.4082,7012.0493,13.8671,904.8018,5955.3685,1,14,,,,,,,,,,,14
|
training_logs/20260428_203615_reward_vs_steps.png
ADDED
|
Git LFS Details
|
training_logs/20260428_203615_trial_results.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/20260428_203615_turn_count_distribution.png
ADDED
|
training_logs/20260428_203615_vllm_metrics_a2_rl_stack_pytest_v2_387401.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_logs/20260428_203615_vllm_metrics_table.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|