| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.3529411764705883, | |
| "eval_steps": 10, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 169.93887329101562, | |
| "learning_rate": 6e-06, | |
| "loss": 4.8387, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "eval_loss": 4.6919026374816895, | |
| "eval_runtime": 15.5369, | |
| "eval_samples_per_second": 350.456, | |
| "eval_steps_per_second": 5.535, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 103.59686279296875, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 4.3444, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "eval_loss": 3.9225239753723145, | |
| "eval_runtime": 16.2767, | |
| "eval_samples_per_second": 334.526, | |
| "eval_steps_per_second": 5.284, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 96.45588684082031, | |
| "learning_rate": 1.9333333333333333e-05, | |
| "loss": 3.5537, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "eval_loss": 3.1923811435699463, | |
| "eval_runtime": 17.7651, | |
| "eval_samples_per_second": 306.5, | |
| "eval_steps_per_second": 4.841, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 116.93628692626953, | |
| "learning_rate": 1.8222222222222224e-05, | |
| "loss": 2.4677, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "eval_loss": 2.3630847930908203, | |
| "eval_runtime": 19.3532, | |
| "eval_samples_per_second": 281.349, | |
| "eval_steps_per_second": 4.444, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 123.0512924194336, | |
| "learning_rate": 1.7111111111111112e-05, | |
| "loss": 1.5596, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "eval_loss": 1.779540777206421, | |
| "eval_runtime": 17.9711, | |
| "eval_samples_per_second": 302.987, | |
| "eval_steps_per_second": 4.785, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 116.81210327148438, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.9314, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "eval_loss": 1.3096877336502075, | |
| "eval_runtime": 17.7299, | |
| "eval_samples_per_second": 307.109, | |
| "eval_steps_per_second": 4.851, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 78.21733093261719, | |
| "learning_rate": 1.488888888888889e-05, | |
| "loss": 0.4562, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "eval_loss": 1.0128556489944458, | |
| "eval_runtime": 18.4122, | |
| "eval_samples_per_second": 295.729, | |
| "eval_steps_per_second": 4.671, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 65.50968933105469, | |
| "learning_rate": 1.377777777777778e-05, | |
| "loss": 0.3587, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "eval_loss": 0.9688291549682617, | |
| "eval_runtime": 18.5333, | |
| "eval_samples_per_second": 293.795, | |
| "eval_steps_per_second": 4.64, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0588235294117647, | |
| "grad_norm": 61.12400817871094, | |
| "learning_rate": 1.2666666666666667e-05, | |
| "loss": 0.3667, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0588235294117647, | |
| "eval_loss": 0.9096461534500122, | |
| "eval_runtime": 18.1026, | |
| "eval_samples_per_second": 300.785, | |
| "eval_steps_per_second": 4.751, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 50.41886901855469, | |
| "learning_rate": 1.1555555555555556e-05, | |
| "loss": 0.3104, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "eval_loss": 0.9075976610183716, | |
| "eval_runtime": 18.2417, | |
| "eval_samples_per_second": 298.492, | |
| "eval_steps_per_second": 4.714, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.2941176470588236, | |
| "grad_norm": 49.723411560058594, | |
| "learning_rate": 1.0444444444444445e-05, | |
| "loss": 0.3057, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.2941176470588236, | |
| "eval_loss": 0.8963654637336731, | |
| "eval_runtime": 18.3711, | |
| "eval_samples_per_second": 296.389, | |
| "eval_steps_per_second": 4.681, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "grad_norm": 66.96435546875, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.2852, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "eval_loss": 0.8938003778457642, | |
| "eval_runtime": 18.4942, | |
| "eval_samples_per_second": 294.417, | |
| "eval_steps_per_second": 4.65, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.5294117647058822, | |
| "grad_norm": 67.33085632324219, | |
| "learning_rate": 8.222222222222222e-06, | |
| "loss": 0.2527, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.5294117647058822, | |
| "eval_loss": 0.9134606122970581, | |
| "eval_runtime": 18.4294, | |
| "eval_samples_per_second": 295.452, | |
| "eval_steps_per_second": 4.666, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "grad_norm": 60.442684173583984, | |
| "learning_rate": 7.111111111111112e-06, | |
| "loss": 0.3592, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "eval_loss": 0.8834967017173767, | |
| "eval_runtime": 18.3629, | |
| "eval_samples_per_second": 296.522, | |
| "eval_steps_per_second": 4.683, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 59.17316818237305, | |
| "learning_rate": 6e-06, | |
| "loss": 0.1998, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "eval_loss": 0.862636923789978, | |
| "eval_runtime": 18.2959, | |
| "eval_samples_per_second": 297.608, | |
| "eval_steps_per_second": 4.701, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": 33.52590560913086, | |
| "learning_rate": 4.888888888888889e-06, | |
| "loss": 0.2258, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "eval_loss": 0.8706350326538086, | |
| "eval_runtime": 18.3223, | |
| "eval_samples_per_second": 297.179, | |
| "eval_steps_per_second": 4.694, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 48.038719177246094, | |
| "learning_rate": 3.777777777777778e-06, | |
| "loss": 0.1933, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.875869870185852, | |
| "eval_runtime": 18.2474, | |
| "eval_samples_per_second": 298.399, | |
| "eval_steps_per_second": 4.713, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "grad_norm": 42.39509201049805, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 0.1987, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "eval_loss": 0.875755250453949, | |
| "eval_runtime": 18.2272, | |
| "eval_samples_per_second": 298.729, | |
| "eval_steps_per_second": 4.718, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "grad_norm": 24.651744842529297, | |
| "learning_rate": 1.5555555555555558e-06, | |
| "loss": 0.1684, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "eval_loss": 0.8625762462615967, | |
| "eval_runtime": 18.1576, | |
| "eval_samples_per_second": 299.874, | |
| "eval_steps_per_second": 4.736, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 27.64773941040039, | |
| "learning_rate": 4.444444444444445e-07, | |
| "loss": 0.1612, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "eval_loss": 0.8577004075050354, | |
| "eval_runtime": 18.1098, | |
| "eval_samples_per_second": 300.666, | |
| "eval_steps_per_second": 4.749, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 2500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |