| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 135.59322033898306, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 2.71, |
| "learning_rate": 0.0196, |
| "loss": 4.115, |
| "step": 10 |
| }, |
| { |
| "epoch": 5.42, |
| "learning_rate": 0.0192, |
| "loss": 4.0422, |
| "step": 20 |
| }, |
| { |
| "epoch": 8.14, |
| "learning_rate": 0.0188, |
| "loss": 3.7797, |
| "step": 30 |
| }, |
| { |
| "epoch": 10.85, |
| "learning_rate": 0.0184, |
| "loss": 3.204, |
| "step": 40 |
| }, |
| { |
| "epoch": 13.56, |
| "learning_rate": 0.018000000000000002, |
| "loss": 2.7285, |
| "step": 50 |
| }, |
| { |
| "epoch": 16.27, |
| "learning_rate": 0.0176, |
| "loss": 2.1524, |
| "step": 60 |
| }, |
| { |
| "epoch": 18.98, |
| "learning_rate": 0.0172, |
| "loss": 1.6875, |
| "step": 70 |
| }, |
| { |
| "epoch": 21.69, |
| "learning_rate": 0.0168, |
| "loss": 1.2613, |
| "step": 80 |
| }, |
| { |
| "epoch": 24.41, |
| "learning_rate": 0.016399999999999998, |
| "loss": 0.9464, |
| "step": 90 |
| }, |
| { |
| "epoch": 27.12, |
| "learning_rate": 0.016, |
| "loss": 0.734, |
| "step": 100 |
| }, |
| { |
| "epoch": 29.83, |
| "learning_rate": 0.015600000000000001, |
| "loss": 0.5502, |
| "step": 110 |
| }, |
| { |
| "epoch": 32.54, |
| "learning_rate": 0.0152, |
| "loss": 0.4353, |
| "step": 120 |
| }, |
| { |
| "epoch": 35.25, |
| "learning_rate": 0.0148, |
| "loss": 0.3286, |
| "step": 130 |
| }, |
| { |
| "epoch": 37.97, |
| "learning_rate": 0.0144, |
| "loss": 0.2814, |
| "step": 140 |
| }, |
| { |
| "epoch": 40.68, |
| "learning_rate": 0.013999999999999999, |
| "loss": 0.2337, |
| "step": 150 |
| }, |
| { |
| "epoch": 43.39, |
| "learning_rate": 0.013600000000000001, |
| "loss": 0.1949, |
| "step": 160 |
| }, |
| { |
| "epoch": 46.1, |
| "learning_rate": 0.013200000000000002, |
| "loss": 0.1482, |
| "step": 170 |
| }, |
| { |
| "epoch": 48.81, |
| "learning_rate": 0.0128, |
| "loss": 0.136, |
| "step": 180 |
| }, |
| { |
| "epoch": 51.53, |
| "learning_rate": 0.0124, |
| "loss": 0.1175, |
| "step": 190 |
| }, |
| { |
| "epoch": 54.24, |
| "learning_rate": 0.012, |
| "loss": 0.0995, |
| "step": 200 |
| }, |
| { |
| "epoch": 56.95, |
| "learning_rate": 0.0116, |
| "loss": 0.0841, |
| "step": 210 |
| }, |
| { |
| "epoch": 59.66, |
| "learning_rate": 0.011200000000000002, |
| "loss": 0.07, |
| "step": 220 |
| }, |
| { |
| "epoch": 62.37, |
| "learning_rate": 0.0108, |
| "loss": 0.0672, |
| "step": 230 |
| }, |
| { |
| "epoch": 65.08, |
| "learning_rate": 0.010400000000000001, |
| "loss": 0.058, |
| "step": 240 |
| }, |
| { |
| "epoch": 67.8, |
| "learning_rate": 0.01, |
| "loss": 0.0546, |
| "step": 250 |
| }, |
| { |
| "epoch": 70.51, |
| "learning_rate": 0.0096, |
| "loss": 0.0503, |
| "step": 260 |
| }, |
| { |
| "epoch": 73.22, |
| "learning_rate": 0.0092, |
| "loss": 0.0471, |
| "step": 270 |
| }, |
| { |
| "epoch": 75.93, |
| "learning_rate": 0.0088, |
| "loss": 0.0422, |
| "step": 280 |
| }, |
| { |
| "epoch": 78.64, |
| "learning_rate": 0.0084, |
| "loss": 0.0352, |
| "step": 290 |
| }, |
| { |
| "epoch": 81.36, |
| "learning_rate": 0.008, |
| "loss": 0.0374, |
| "step": 300 |
| }, |
| { |
| "epoch": 84.07, |
| "learning_rate": 0.0076, |
| "loss": 0.033, |
| "step": 310 |
| }, |
| { |
| "epoch": 86.78, |
| "learning_rate": 0.0072, |
| "loss": 0.0332, |
| "step": 320 |
| }, |
| { |
| "epoch": 89.49, |
| "learning_rate": 0.0068000000000000005, |
| "loss": 0.0292, |
| "step": 330 |
| }, |
| { |
| "epoch": 92.2, |
| "learning_rate": 0.0064, |
| "loss": 0.0298, |
| "step": 340 |
| }, |
| { |
| "epoch": 94.92, |
| "learning_rate": 0.006, |
| "loss": 0.0258, |
| "step": 350 |
| }, |
| { |
| "epoch": 97.63, |
| "learning_rate": 0.005600000000000001, |
| "loss": 0.0263, |
| "step": 360 |
| }, |
| { |
| "epoch": 100.34, |
| "learning_rate": 0.005200000000000001, |
| "loss": 0.0252, |
| "step": 370 |
| }, |
| { |
| "epoch": 103.05, |
| "learning_rate": 0.0048, |
| "loss": 0.0249, |
| "step": 380 |
| }, |
| { |
| "epoch": 105.76, |
| "learning_rate": 0.0044, |
| "loss": 0.0225, |
| "step": 390 |
| }, |
| { |
| "epoch": 108.47, |
| "learning_rate": 0.004, |
| "loss": 0.0219, |
| "step": 400 |
| }, |
| { |
| "epoch": 111.19, |
| "learning_rate": 0.0036, |
| "loss": 0.0224, |
| "step": 410 |
| }, |
| { |
| "epoch": 113.9, |
| "learning_rate": 0.0032, |
| "loss": 0.0238, |
| "step": 420 |
| }, |
| { |
| "epoch": 116.61, |
| "learning_rate": 0.0028000000000000004, |
| "loss": 0.0196, |
| "step": 430 |
| }, |
| { |
| "epoch": 119.32, |
| "learning_rate": 0.0024, |
| "loss": 0.0207, |
| "step": 440 |
| }, |
| { |
| "epoch": 122.03, |
| "learning_rate": 0.002, |
| "loss": 0.0208, |
| "step": 450 |
| }, |
| { |
| "epoch": 124.75, |
| "learning_rate": 0.0016, |
| "loss": 0.0204, |
| "step": 460 |
| }, |
| { |
| "epoch": 127.46, |
| "learning_rate": 0.0012, |
| "loss": 0.0207, |
| "step": 470 |
| }, |
| { |
| "epoch": 130.17, |
| "learning_rate": 0.0008, |
| "loss": 0.0206, |
| "step": 480 |
| }, |
| { |
| "epoch": 132.88, |
| "learning_rate": 0.0004, |
| "loss": 0.0203, |
| "step": 490 |
| }, |
| { |
| "epoch": 135.59, |
| "learning_rate": 0.0, |
| "loss": 0.021, |
| "step": 500 |
| } |
| ], |
| "max_steps": 500, |
| "num_train_epochs": 167, |
| "total_flos": 6.9331442466816e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|