| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9869281045751634, |
| "eval_steps": 500, |
| "global_step": 228, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.13071895424836602, |
| "grad_norm": 13.640703803287584, |
| "learning_rate": 5e-06, |
| "loss": 0.9844, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.26143790849673204, |
| "grad_norm": 0.9447823765191696, |
| "learning_rate": 5e-06, |
| "loss": 0.8863, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 1.038467083575288, |
| "learning_rate": 5e-06, |
| "loss": 0.8449, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5228758169934641, |
| "grad_norm": 1.6427820915875102, |
| "learning_rate": 5e-06, |
| "loss": 0.8257, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6535947712418301, |
| "grad_norm": 1.7936488531165335, |
| "learning_rate": 5e-06, |
| "loss": 0.8169, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 0.7697816321414731, |
| "learning_rate": 5e-06, |
| "loss": 0.8056, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.9150326797385621, |
| "grad_norm": 1.250994290613137, |
| "learning_rate": 5e-06, |
| "loss": 0.7971, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.0490196078431373, |
| "grad_norm": 0.843433753244107, |
| "learning_rate": 5e-06, |
| "loss": 0.8367, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.1797385620915033, |
| "grad_norm": 0.6027656670101825, |
| "learning_rate": 5e-06, |
| "loss": 0.7483, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.3104575163398693, |
| "grad_norm": 0.8024576817469242, |
| "learning_rate": 5e-06, |
| "loss": 0.7428, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.4411764705882353, |
| "grad_norm": 0.7594730875779195, |
| "learning_rate": 5e-06, |
| "loss": 0.7401, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.5718954248366013, |
| "grad_norm": 0.5293998500966177, |
| "learning_rate": 5e-06, |
| "loss": 0.7426, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.7026143790849673, |
| "grad_norm": 0.9249042901353932, |
| "learning_rate": 5e-06, |
| "loss": 0.7406, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.8333333333333335, |
| "grad_norm": 0.5767932991870924, |
| "learning_rate": 5e-06, |
| "loss": 0.7379, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.9640522875816995, |
| "grad_norm": 0.5974379920218519, |
| "learning_rate": 5e-06, |
| "loss": 0.7337, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.0980392156862746, |
| "grad_norm": 1.3799060310730653, |
| "learning_rate": 5e-06, |
| "loss": 0.7541, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.2287581699346406, |
| "grad_norm": 0.8440697657265467, |
| "learning_rate": 5e-06, |
| "loss": 0.6849, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.3594771241830066, |
| "grad_norm": 0.6523664577578698, |
| "learning_rate": 5e-06, |
| "loss": 0.6828, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.4901960784313726, |
| "grad_norm": 0.5604368514967889, |
| "learning_rate": 5e-06, |
| "loss": 0.6833, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.6209150326797386, |
| "grad_norm": 0.651015676014187, |
| "learning_rate": 5e-06, |
| "loss": 0.6825, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.7516339869281046, |
| "grad_norm": 0.6331718263692562, |
| "learning_rate": 5e-06, |
| "loss": 0.6826, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.8823529411764706, |
| "grad_norm": 0.6748382635791591, |
| "learning_rate": 5e-06, |
| "loss": 0.6867, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.9869281045751634, |
| "step": 228, |
| "total_flos": 381489732648960.0, |
| "train_loss": 0.7633350188272041, |
| "train_runtime": 3333.6415, |
| "train_samples_per_second": 35.22, |
| "train_steps_per_second": 0.068 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 228, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 381489732648960.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|