| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.3529411764705883, | |
| "eval_steps": 10, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 99.34697723388672, | |
| "learning_rate": 7e-06, | |
| "loss": 4.6344, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "eval_loss": 4.356701850891113, | |
| "eval_runtime": 7.4823, | |
| "eval_samples_per_second": 727.722, | |
| "eval_steps_per_second": 11.494, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 68.6648941040039, | |
| "learning_rate": 1.7e-05, | |
| "loss": 4.1147, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "eval_loss": 3.802903652191162, | |
| "eval_runtime": 7.4967, | |
| "eval_samples_per_second": 726.322, | |
| "eval_steps_per_second": 11.472, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 73.34857940673828, | |
| "learning_rate": 1.9222222222222225e-05, | |
| "loss": 3.3201, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "eval_loss": 2.9514710903167725, | |
| "eval_runtime": 7.5247, | |
| "eval_samples_per_second": 723.619, | |
| "eval_steps_per_second": 11.429, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 61.95991134643555, | |
| "learning_rate": 1.8111111111111112e-05, | |
| "loss": 2.1842, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "eval_loss": 2.131948947906494, | |
| "eval_runtime": 7.7586, | |
| "eval_samples_per_second": 701.798, | |
| "eval_steps_per_second": 11.084, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 48.887203216552734, | |
| "learning_rate": 1.7e-05, | |
| "loss": 1.2624, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "eval_loss": 1.3057191371917725, | |
| "eval_runtime": 7.9165, | |
| "eval_samples_per_second": 687.801, | |
| "eval_steps_per_second": 10.863, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 35.66941833496094, | |
| "learning_rate": 1.588888888888889e-05, | |
| "loss": 0.6907, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "eval_loss": 0.9652644991874695, | |
| "eval_runtime": 7.9447, | |
| "eval_samples_per_second": 685.363, | |
| "eval_steps_per_second": 10.825, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 29.34256935119629, | |
| "learning_rate": 1.477777777777778e-05, | |
| "loss": 0.3527, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "eval_loss": 0.8976885080337524, | |
| "eval_runtime": 8.0315, | |
| "eval_samples_per_second": 677.953, | |
| "eval_steps_per_second": 10.708, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 32.28174591064453, | |
| "learning_rate": 1.3666666666666667e-05, | |
| "loss": 0.2828, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "eval_loss": 0.8685462474822998, | |
| "eval_runtime": 8.14, | |
| "eval_samples_per_second": 668.919, | |
| "eval_steps_per_second": 10.565, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0588235294117647, | |
| "grad_norm": 38.87677764892578, | |
| "learning_rate": 1.2555555555555557e-05, | |
| "loss": 0.2758, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0588235294117647, | |
| "eval_loss": 0.8556678295135498, | |
| "eval_runtime": 8.2601, | |
| "eval_samples_per_second": 659.194, | |
| "eval_steps_per_second": 10.412, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 33.91325378417969, | |
| "learning_rate": 1.1444444444444444e-05, | |
| "loss": 0.2576, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "eval_loss": 0.8554092049598694, | |
| "eval_runtime": 8.4912, | |
| "eval_samples_per_second": 641.252, | |
| "eval_steps_per_second": 10.128, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.2941176470588236, | |
| "grad_norm": 27.338443756103516, | |
| "learning_rate": 1.0333333333333335e-05, | |
| "loss": 0.2877, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.2941176470588236, | |
| "eval_loss": 0.8585284948348999, | |
| "eval_runtime": 8.7035, | |
| "eval_samples_per_second": 625.611, | |
| "eval_steps_per_second": 9.881, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "grad_norm": 33.70604705810547, | |
| "learning_rate": 9.222222222222224e-06, | |
| "loss": 0.2109, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "eval_loss": 0.8407207727432251, | |
| "eval_runtime": 8.9789, | |
| "eval_samples_per_second": 606.424, | |
| "eval_steps_per_second": 9.578, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.5294117647058822, | |
| "grad_norm": 38.09708786010742, | |
| "learning_rate": 8.111111111111112e-06, | |
| "loss": 0.206, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.5294117647058822, | |
| "eval_loss": 0.8386306166648865, | |
| "eval_runtime": 9.2342, | |
| "eval_samples_per_second": 589.658, | |
| "eval_steps_per_second": 9.313, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "grad_norm": 47.18006896972656, | |
| "learning_rate": 7e-06, | |
| "loss": 0.2828, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "eval_loss": 0.8333184719085693, | |
| "eval_runtime": 9.1122, | |
| "eval_samples_per_second": 597.55, | |
| "eval_steps_per_second": 9.438, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 24.672765731811523, | |
| "learning_rate": 5.88888888888889e-06, | |
| "loss": 0.2045, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "eval_loss": 0.8294004201889038, | |
| "eval_runtime": 8.9028, | |
| "eval_samples_per_second": 611.608, | |
| "eval_steps_per_second": 9.66, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": 21.150001525878906, | |
| "learning_rate": 4.777777777777778e-06, | |
| "loss": 0.1932, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "eval_loss": 0.828009843826294, | |
| "eval_runtime": 8.7354, | |
| "eval_samples_per_second": 623.324, | |
| "eval_steps_per_second": 9.845, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 55.02903366088867, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "loss": 0.1763, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.824001133441925, | |
| "eval_runtime": 8.6395, | |
| "eval_samples_per_second": 630.244, | |
| "eval_steps_per_second": 9.954, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "grad_norm": 15.967198371887207, | |
| "learning_rate": 2.5555555555555557e-06, | |
| "loss": 0.1603, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "eval_loss": 0.8176314830780029, | |
| "eval_runtime": 8.7379, | |
| "eval_samples_per_second": 623.15, | |
| "eval_steps_per_second": 9.842, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "grad_norm": 9.89770221710205, | |
| "learning_rate": 1.4444444444444445e-06, | |
| "loss": 0.1445, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "eval_loss": 0.8145530223846436, | |
| "eval_runtime": 8.8692, | |
| "eval_samples_per_second": 613.923, | |
| "eval_steps_per_second": 9.696, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 31.038183212280273, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 0.1534, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "eval_loss": 0.8133436441421509, | |
| "eval_runtime": 9.0044, | |
| "eval_samples_per_second": 604.702, | |
| "eval_steps_per_second": 9.551, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 2500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |