| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.761904761904762, | |
| "eval_steps": 10, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.23809523809523808, | |
| "grad_norm": 107.2943344116211, | |
| "learning_rate": 7e-06, | |
| "loss": 4.5568, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.23809523809523808, | |
| "eval_loss": 4.329397678375244, | |
| "eval_runtime": 2.6051, | |
| "eval_samples_per_second": 982.322, | |
| "eval_steps_per_second": 15.355, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 82.67948150634766, | |
| "learning_rate": 1.7e-05, | |
| "loss": 3.937, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "eval_loss": 3.166707754135132, | |
| "eval_runtime": 2.6689, | |
| "eval_samples_per_second": 958.832, | |
| "eval_steps_per_second": 14.988, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 81.49203491210938, | |
| "learning_rate": 1.9222222222222225e-05, | |
| "loss": 2.7135, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "eval_loss": 1.7821017503738403, | |
| "eval_runtime": 3.0515, | |
| "eval_samples_per_second": 838.597, | |
| "eval_steps_per_second": 13.108, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 67.98638153076172, | |
| "learning_rate": 1.8111111111111112e-05, | |
| "loss": 1.6075, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "eval_loss": 0.9218789339065552, | |
| "eval_runtime": 2.6581, | |
| "eval_samples_per_second": 962.702, | |
| "eval_steps_per_second": 15.048, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.1904761904761905, | |
| "grad_norm": 41.94712448120117, | |
| "learning_rate": 1.7e-05, | |
| "loss": 0.7666, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.1904761904761905, | |
| "eval_loss": 0.5395554304122925, | |
| "eval_runtime": 2.7233, | |
| "eval_samples_per_second": 939.664, | |
| "eval_steps_per_second": 14.688, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 45.76442337036133, | |
| "learning_rate": 1.588888888888889e-05, | |
| "loss": 0.4077, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "eval_loss": 0.395812064409256, | |
| "eval_runtime": 2.6568, | |
| "eval_samples_per_second": 963.196, | |
| "eval_steps_per_second": 15.056, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 36.85358428955078, | |
| "learning_rate": 1.477777777777778e-05, | |
| "loss": 0.2843, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "eval_loss": 0.3538413345813751, | |
| "eval_runtime": 2.958, | |
| "eval_samples_per_second": 865.099, | |
| "eval_steps_per_second": 13.522, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "grad_norm": 17.167036056518555, | |
| "learning_rate": 1.3666666666666667e-05, | |
| "loss": 0.2165, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.9047619047619047, | |
| "eval_loss": 0.3396788537502289, | |
| "eval_runtime": 3.0374, | |
| "eval_samples_per_second": 842.51, | |
| "eval_steps_per_second": 13.169, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 28.207691192626953, | |
| "learning_rate": 1.2555555555555557e-05, | |
| "loss": 0.1979, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "eval_loss": 0.32209891080856323, | |
| "eval_runtime": 2.6763, | |
| "eval_samples_per_second": 956.167, | |
| "eval_steps_per_second": 14.946, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "grad_norm": 20.979476928710938, | |
| "learning_rate": 1.1444444444444444e-05, | |
| "loss": 0.1476, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.380952380952381, | |
| "eval_loss": 0.30111950635910034, | |
| "eval_runtime": 2.7597, | |
| "eval_samples_per_second": 927.283, | |
| "eval_steps_per_second": 14.494, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.619047619047619, | |
| "grad_norm": 14.82142448425293, | |
| "learning_rate": 1.0333333333333335e-05, | |
| "loss": 0.1061, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.619047619047619, | |
| "eval_loss": 0.30764198303222656, | |
| "eval_runtime": 2.701, | |
| "eval_samples_per_second": 947.422, | |
| "eval_steps_per_second": 14.809, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 19.38449478149414, | |
| "learning_rate": 9.222222222222224e-06, | |
| "loss": 0.0907, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "eval_loss": 0.31123870611190796, | |
| "eval_runtime": 2.7275, | |
| "eval_samples_per_second": 938.22, | |
| "eval_steps_per_second": 14.665, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.0952380952380953, | |
| "grad_norm": 15.523360252380371, | |
| "learning_rate": 8.111111111111112e-06, | |
| "loss": 0.0573, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.0952380952380953, | |
| "eval_loss": 0.306456595659256, | |
| "eval_runtime": 3.037, | |
| "eval_samples_per_second": 842.596, | |
| "eval_steps_per_second": 13.171, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 9.912215232849121, | |
| "learning_rate": 7e-06, | |
| "loss": 0.0638, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "eval_loss": 0.3030768036842346, | |
| "eval_runtime": 2.784, | |
| "eval_samples_per_second": 919.181, | |
| "eval_steps_per_second": 14.368, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.571428571428571, | |
| "grad_norm": 32.101985931396484, | |
| "learning_rate": 5.88888888888889e-06, | |
| "loss": 0.0716, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.571428571428571, | |
| "eval_loss": 0.2990337312221527, | |
| "eval_runtime": 2.8128, | |
| "eval_samples_per_second": 909.761, | |
| "eval_steps_per_second": 14.221, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.8095238095238093, | |
| "grad_norm": 17.976055145263672, | |
| "learning_rate": 4.777777777777778e-06, | |
| "loss": 0.0841, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.8095238095238093, | |
| "eval_loss": 0.3003748655319214, | |
| "eval_runtime": 2.7098, | |
| "eval_samples_per_second": 944.346, | |
| "eval_steps_per_second": 14.761, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 4.0476190476190474, | |
| "grad_norm": 16.045225143432617, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "loss": 0.096, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 4.0476190476190474, | |
| "eval_loss": 0.3065829873085022, | |
| "eval_runtime": 2.7428, | |
| "eval_samples_per_second": 933.004, | |
| "eval_steps_per_second": 14.584, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 4.285714285714286, | |
| "grad_norm": 24.23316764831543, | |
| "learning_rate": 2.5555555555555557e-06, | |
| "loss": 0.0728, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.285714285714286, | |
| "eval_loss": 0.3128698170185089, | |
| "eval_runtime": 3.0252, | |
| "eval_samples_per_second": 845.905, | |
| "eval_steps_per_second": 13.222, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.523809523809524, | |
| "grad_norm": 10.853073120117188, | |
| "learning_rate": 1.4444444444444445e-06, | |
| "loss": 0.0594, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.523809523809524, | |
| "eval_loss": 0.3163248300552368, | |
| "eval_runtime": 2.763, | |
| "eval_samples_per_second": 926.153, | |
| "eval_steps_per_second": 14.477, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.761904761904762, | |
| "grad_norm": 18.616168975830078, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 0.071, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.761904761904762, | |
| "eval_loss": 0.3181891143321991, | |
| "eval_runtime": 3.0801, | |
| "eval_samples_per_second": 830.819, | |
| "eval_steps_per_second": 12.987, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 2500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |