| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 1000, |
| "global_step": 3180, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.047175374454534735, |
| "grad_norm": 0.6658920049667358, |
| "learning_rate": 2.0545073375262055e-06, |
| "loss": 0.7831, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09435074890906947, |
| "grad_norm": 0.6541114449501038, |
| "learning_rate": 4.150943396226416e-06, |
| "loss": 0.6891, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1415261233636042, |
| "grad_norm": 0.64142906665802, |
| "learning_rate": 6.247379454926625e-06, |
| "loss": 0.6727, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.18870149781813894, |
| "grad_norm": 0.645492434501648, |
| "learning_rate": 8.343815513626834e-06, |
| "loss": 0.6608, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.23587687227267368, |
| "grad_norm": 0.6750236749649048, |
| "learning_rate": 1.0440251572327045e-05, |
| "loss": 0.6656, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2830522467272084, |
| "grad_norm": 0.6192681789398193, |
| "learning_rate": 1.2536687631027256e-05, |
| "loss": 0.6613, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.33022762118174315, |
| "grad_norm": 0.6124400496482849, |
| "learning_rate": 1.4633123689727464e-05, |
| "loss": 0.6612, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.3774029956362779, |
| "grad_norm": 0.5999819040298462, |
| "learning_rate": 1.6729559748427675e-05, |
| "loss": 0.6587, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.4245783700908126, |
| "grad_norm": 0.6131274104118347, |
| "learning_rate": 1.8825995807127882e-05, |
| "loss": 0.6613, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.47175374454534735, |
| "grad_norm": 0.5893041491508484, |
| "learning_rate": 1.9996731117142877e-05, |
| "loss": 0.6615, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.518929118999882, |
| "grad_norm": 0.546073853969574, |
| "learning_rate": 1.996500635384337e-05, |
| "loss": 0.6598, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5661044934544168, |
| "grad_norm": 0.5713281035423279, |
| "learning_rate": 1.9899637947477248e-05, |
| "loss": 0.6586, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.6132798679089515, |
| "grad_norm": 0.5555443167686462, |
| "learning_rate": 1.9800846593471427e-05, |
| "loss": 0.6602, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6604552423634863, |
| "grad_norm": 0.5415436625480652, |
| "learning_rate": 1.966896582909968e-05, |
| "loss": 0.6577, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.707630616818021, |
| "grad_norm": 0.5836868286132812, |
| "learning_rate": 1.9504440907401113e-05, |
| "loss": 0.6544, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7548059912725558, |
| "grad_norm": 0.539230465888977, |
| "learning_rate": 1.9307827293926344e-05, |
| "loss": 0.6577, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.8019813657270904, |
| "grad_norm": 0.5532299280166626, |
| "learning_rate": 1.9079788791386468e-05, |
| "loss": 0.6569, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8491567401816252, |
| "grad_norm": 0.5102491974830627, |
| "learning_rate": 1.8821095298536435e-05, |
| "loss": 0.6502, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8963321146361599, |
| "grad_norm": 0.5178349614143372, |
| "learning_rate": 1.853262021085921e-05, |
| "loss": 0.6559, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.9435074890906947, |
| "grad_norm": 0.5248669385910034, |
| "learning_rate": 1.821533747182645e-05, |
| "loss": 0.6549, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9435074890906947, |
| "eval_loss": 0.6482675671577454, |
| "eval_runtime": 113.518, |
| "eval_samples_per_second": 66.395, |
| "eval_steps_per_second": 2.079, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9906828635452294, |
| "grad_norm": 0.5705227255821228, |
| "learning_rate": 1.787031828469124e-05, |
| "loss": 0.6477, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.0377402995636278, |
| "grad_norm": 0.5323419570922852, |
| "learning_rate": 1.7498727495914378e-05, |
| "loss": 0.5975, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0849156740181625, |
| "grad_norm": 0.5340594053268433, |
| "learning_rate": 1.710181966243447e-05, |
| "loss": 0.5819, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.1320910484726974, |
| "grad_norm": 0.49585267901420593, |
| "learning_rate": 1.6680934816059403e-05, |
| "loss": 0.5819, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.179266422927232, |
| "grad_norm": 0.500704824924469, |
| "learning_rate": 1.623749393927938e-05, |
| "loss": 0.5853, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.2264417973817667, |
| "grad_norm": 0.49850329756736755, |
| "learning_rate": 1.5772994167775986e-05, |
| "loss": 0.5844, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.2736171718363014, |
| "grad_norm": 0.5016334652900696, |
| "learning_rate": 1.5289003735824454e-05, |
| "loss": 0.5866, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.320792546290836, |
| "grad_norm": 0.5468031764030457, |
| "learning_rate": 1.4787156681654358e-05, |
| "loss": 0.5819, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.367967920745371, |
| "grad_norm": 0.5233626961708069, |
| "learning_rate": 1.426914733064444e-05, |
| "loss": 0.5849, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.4151432951999057, |
| "grad_norm": 0.5027523040771484, |
| "learning_rate": 1.373672457497717e-05, |
| "loss": 0.5844, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.4623186696544404, |
| "grad_norm": 0.5051872730255127, |
| "learning_rate": 1.3191685969066082e-05, |
| "loss": 0.5846, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.5094940441089753, |
| "grad_norm": 0.5215147733688354, |
| "learning_rate": 1.2635871660690677e-05, |
| "loss": 0.5829, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.55666941856351, |
| "grad_norm": 0.5042491555213928, |
| "learning_rate": 1.2071158178328547e-05, |
| "loss": 0.5854, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.6038447930180446, |
| "grad_norm": 0.5223520994186401, |
| "learning_rate": 1.1499452095659713e-05, |
| "loss": 0.5833, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.6510201674725793, |
| "grad_norm": 0.4980650246143341, |
| "learning_rate": 1.092268359463302e-05, |
| "loss": 0.5808, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.698195541927114, |
| "grad_norm": 0.5121437907218933, |
| "learning_rate": 1.0342799948826788e-05, |
| "loss": 0.5823, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.7453709163816487, |
| "grad_norm": 0.4902792274951935, |
| "learning_rate": 9.761758949105056e-06, |
| "loss": 0.5824, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.7925462908361836, |
| "grad_norm": 0.4946081042289734, |
| "learning_rate": 9.18152229376561e-06, |
| "loss": 0.5814, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.8397216652907182, |
| "grad_norm": 0.5029374361038208, |
| "learning_rate": 8.604048965495786e-06, |
| "loss": 0.5795, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.8868970397452531, |
| "grad_norm": 0.47936609387397766, |
| "learning_rate": 8.031288617496686e-06, |
| "loss": 0.5796, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.8868970397452531, |
| "eval_loss": 0.6402788162231445, |
| "eval_runtime": 113.5077, |
| "eval_samples_per_second": 66.401, |
| "eval_steps_per_second": 2.079, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.9340724141997878, |
| "grad_norm": 0.5165300369262695, |
| "learning_rate": 7.465174991105405e-06, |
| "loss": 0.5762, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.9812477886543225, |
| "grad_norm": 0.49469050765037537, |
| "learning_rate": 6.90761938713854e-06, |
| "loss": 0.5747, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.028305224672721, |
| "grad_norm": 0.5297324061393738, |
| "learning_rate": 6.360504212998903e-06, |
| "loss": 0.5171, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.0754805991272556, |
| "grad_norm": 0.5692960023880005, |
| "learning_rate": 5.825676627331614e-06, |
| "loss": 0.471, |
| "step": 2200 |
| }, |
| { |
| "epoch": 2.1226559735817903, |
| "grad_norm": 0.5705838799476624, |
| "learning_rate": 5.304942303686238e-06, |
| "loss": 0.4737, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.169831348036325, |
| "grad_norm": 0.5800350308418274, |
| "learning_rate": 4.800059334240049e-06, |
| "loss": 0.4734, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.2170067224908596, |
| "grad_norm": 0.5653440952301025, |
| "learning_rate": 4.3127322941645385e-06, |
| "loss": 0.4713, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.2641820969453947, |
| "grad_norm": 0.5822903513908386, |
| "learning_rate": 3.844606486674862e-06, |
| "loss": 0.4715, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.3113574713999294, |
| "grad_norm": 0.599399209022522, |
| "learning_rate": 3.3972623881920296e-06, |
| "loss": 0.47, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.358532845854464, |
| "grad_norm": 0.5810734033584595, |
| "learning_rate": 2.9722103123719324e-06, |
| "loss": 0.4661, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.405708220308999, |
| "grad_norm": 0.5994310975074768, |
| "learning_rate": 2.5708853110164346e-06, |
| "loss": 0.4686, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.4528835947635335, |
| "grad_norm": 0.577089786529541, |
| "learning_rate": 2.194642329081902e-06, |
| "loss": 0.4673, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.500058969218068, |
| "grad_norm": 0.578800618648529, |
| "learning_rate": 1.844751630142797e-06, |
| "loss": 0.4664, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.547234343672603, |
| "grad_norm": 0.5981403589248657, |
| "learning_rate": 1.5223945077547253e-06, |
| "loss": 0.4686, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.5944097181271375, |
| "grad_norm": 0.6022927165031433, |
| "learning_rate": 1.2286592971962152e-06, |
| "loss": 0.4676, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.641585092581672, |
| "grad_norm": 0.5744144916534424, |
| "learning_rate": 9.645377010542212e-07, |
| "loss": 0.4684, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.6887604670362073, |
| "grad_norm": 0.5747597813606262, |
| "learning_rate": 7.309214410588927e-07, |
| "loss": 0.4632, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.735935841490742, |
| "grad_norm": 0.5752475261688232, |
| "learning_rate": 5.285992474715796e-07, |
| "loss": 0.4643, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.7831112159452767, |
| "grad_norm": 0.5861772894859314, |
| "learning_rate": 3.5825419619046176e-07, |
| "loss": 0.465, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.8302865903998113, |
| "grad_norm": 0.5940195918083191, |
| "learning_rate": 2.2046140256418713e-07, |
| "loss": 0.4646, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.8302865903998113, |
| "eval_loss": 0.6850141286849976, |
| "eval_runtime": 113.5772, |
| "eval_samples_per_second": 66.36, |
| "eval_steps_per_second": 2.078, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.877461964854346, |
| "grad_norm": 0.5916756987571716, |
| "learning_rate": 1.1568607969963175e-07, |
| "loss": 0.4644, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.9246373393088807, |
| "grad_norm": 0.5982924103736877, |
| "learning_rate": 4.42819678192774e-08, |
| "loss": 0.4691, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.9718127137634154, |
| "grad_norm": 0.5770459175109863, |
| "learning_rate": 6.490139970976029e-09, |
| "loss": 0.4661, |
| "step": 3150 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 3180, |
| "total_flos": 1.7575386520355865e+19, |
| "train_loss": 0.5721450322828953, |
| "train_runtime": 14690.2311, |
| "train_samples_per_second": 13.852, |
| "train_steps_per_second": 0.216 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 3180, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.7575386520355865e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|