| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.963963963963964, |
| "eval_steps": 500, |
| "global_step": 81, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.36036036036036034, |
| "grad_norm": 5.788876433035059, |
| "learning_rate": 5e-06, |
| "loss": 0.3026, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.7207207207207207, |
| "grad_norm": 2.9871413577007604, |
| "learning_rate": 5e-06, |
| "loss": 0.2081, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.972972972972973, |
| "eval_loss": 0.18130172789096832, |
| "eval_runtime": 30.3187, |
| "eval_samples_per_second": 24.638, |
| "eval_steps_per_second": 0.396, |
| "step": 27 |
| }, |
| { |
| "epoch": 1.1036036036036037, |
| "grad_norm": 0.7150273386303733, |
| "learning_rate": 5e-06, |
| "loss": 0.202, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.4639639639639639, |
| "grad_norm": 0.765396666007098, |
| "learning_rate": 5e-06, |
| "loss": 0.1717, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.8243243243243243, |
| "grad_norm": 0.43140896524380545, |
| "learning_rate": 5e-06, |
| "loss": 0.1642, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.9684684684684686, |
| "eval_loss": 0.16076427698135376, |
| "eval_runtime": 30.2833, |
| "eval_samples_per_second": 24.667, |
| "eval_steps_per_second": 0.396, |
| "step": 54 |
| }, |
| { |
| "epoch": 2.2072072072072073, |
| "grad_norm": 0.44793469318434836, |
| "learning_rate": 5e-06, |
| "loss": 0.172, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.5675675675675675, |
| "grad_norm": 0.3516467006088966, |
| "learning_rate": 5e-06, |
| "loss": 0.1532, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.9279279279279278, |
| "grad_norm": 0.49731596311614684, |
| "learning_rate": 5e-06, |
| "loss": 0.1506, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.963963963963964, |
| "eval_loss": 0.15399795770645142, |
| "eval_runtime": 29.6439, |
| "eval_samples_per_second": 25.199, |
| "eval_steps_per_second": 0.405, |
| "step": 81 |
| }, |
| { |
| "epoch": 2.963963963963964, |
| "step": 81, |
| "total_flos": 135468637224960.0, |
| "train_loss": 0.1901383955537537, |
| "train_runtime": 5069.1426, |
| "train_samples_per_second": 8.39, |
| "train_steps_per_second": 0.016 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 81, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 135468637224960.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|