| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.983050847457627, |
| "eval_steps": 500, |
| "global_step": 330, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0903954802259887, |
| "grad_norm": 3.1035618914383156, |
| "learning_rate": 5e-06, |
| "loss": 0.7917, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1807909604519774, |
| "grad_norm": 0.8607691188024904, |
| "learning_rate": 5e-06, |
| "loss": 0.6851, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2711864406779661, |
| "grad_norm": 1.1913942325018136, |
| "learning_rate": 5e-06, |
| "loss": 0.6577, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.3615819209039548, |
| "grad_norm": 0.548623444538096, |
| "learning_rate": 5e-06, |
| "loss": 0.6389, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4519774011299435, |
| "grad_norm": 0.585455352514535, |
| "learning_rate": 5e-06, |
| "loss": 0.6294, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5423728813559322, |
| "grad_norm": 0.8347995665944788, |
| "learning_rate": 5e-06, |
| "loss": 0.6195, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.632768361581921, |
| "grad_norm": 0.7995568610573858, |
| "learning_rate": 5e-06, |
| "loss": 0.6108, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7231638418079096, |
| "grad_norm": 0.4720233842026471, |
| "learning_rate": 5e-06, |
| "loss": 0.5995, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8135593220338984, |
| "grad_norm": 0.7085911417350107, |
| "learning_rate": 5e-06, |
| "loss": 0.5964, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.903954802259887, |
| "grad_norm": 0.8836062378579512, |
| "learning_rate": 5e-06, |
| "loss": 0.594, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9943502824858758, |
| "grad_norm": 0.47743399780572227, |
| "learning_rate": 5e-06, |
| "loss": 0.5971, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.9943502824858758, |
| "eval_loss": 0.593043327331543, |
| "eval_runtime": 76.0822, |
| "eval_samples_per_second": 39.181, |
| "eval_steps_per_second": 0.618, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.0847457627118644, |
| "grad_norm": 0.5787192502215555, |
| "learning_rate": 5e-06, |
| "loss": 0.5786, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.1751412429378532, |
| "grad_norm": 0.5658782261763434, |
| "learning_rate": 5e-06, |
| "loss": 0.5597, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.2655367231638417, |
| "grad_norm": 0.9890980264503261, |
| "learning_rate": 5e-06, |
| "loss": 0.556, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.3559322033898304, |
| "grad_norm": 0.663801200308687, |
| "learning_rate": 5e-06, |
| "loss": 0.5637, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.4463276836158192, |
| "grad_norm": 0.5545663342924204, |
| "learning_rate": 5e-06, |
| "loss": 0.5552, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.536723163841808, |
| "grad_norm": 0.6217937754745557, |
| "learning_rate": 5e-06, |
| "loss": 0.557, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.6271186440677967, |
| "grad_norm": 0.5052808547840478, |
| "learning_rate": 5e-06, |
| "loss": 0.5561, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.7175141242937855, |
| "grad_norm": 0.4796090218442844, |
| "learning_rate": 5e-06, |
| "loss": 0.5583, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.807909604519774, |
| "grad_norm": 0.5000197369268986, |
| "learning_rate": 5e-06, |
| "loss": 0.5572, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.8983050847457628, |
| "grad_norm": 0.5617846633897506, |
| "learning_rate": 5e-06, |
| "loss": 0.5549, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.9887005649717513, |
| "grad_norm": 0.5583576310929224, |
| "learning_rate": 5e-06, |
| "loss": 0.5574, |
| "step": 220 |
| }, |
| { |
| "epoch": 1.9977401129943502, |
| "eval_loss": 0.580794095993042, |
| "eval_runtime": 76.6088, |
| "eval_samples_per_second": 38.912, |
| "eval_steps_per_second": 0.614, |
| "step": 221 |
| }, |
| { |
| "epoch": 2.07909604519774, |
| "grad_norm": 0.7175040219630014, |
| "learning_rate": 5e-06, |
| "loss": 0.5432, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.169491525423729, |
| "grad_norm": 0.5582961983522623, |
| "learning_rate": 5e-06, |
| "loss": 0.5182, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.2598870056497176, |
| "grad_norm": 0.5642582951260645, |
| "learning_rate": 5e-06, |
| "loss": 0.5201, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.3502824858757063, |
| "grad_norm": 0.6344116026906986, |
| "learning_rate": 5e-06, |
| "loss": 0.5186, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.440677966101695, |
| "grad_norm": 0.6118175703856888, |
| "learning_rate": 5e-06, |
| "loss": 0.5193, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.5310734463276834, |
| "grad_norm": 0.9063214098694031, |
| "learning_rate": 5e-06, |
| "loss": 0.5262, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.621468926553672, |
| "grad_norm": 0.7840215083427163, |
| "learning_rate": 5e-06, |
| "loss": 0.5232, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.711864406779661, |
| "grad_norm": 0.720684294573406, |
| "learning_rate": 5e-06, |
| "loss": 0.5192, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.8022598870056497, |
| "grad_norm": 0.5415059736199705, |
| "learning_rate": 5e-06, |
| "loss": 0.526, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.8926553672316384, |
| "grad_norm": 0.4878846503316281, |
| "learning_rate": 5e-06, |
| "loss": 0.5174, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.983050847457627, |
| "grad_norm": 0.5471913311499952, |
| "learning_rate": 5e-06, |
| "loss": 0.5233, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.983050847457627, |
| "eval_loss": 0.5797294974327087, |
| "eval_runtime": 75.4018, |
| "eval_samples_per_second": 39.535, |
| "eval_steps_per_second": 0.623, |
| "step": 330 |
| }, |
| { |
| "epoch": 2.983050847457627, |
| "step": 330, |
| "total_flos": 552552911339520.0, |
| "train_loss": 0.5736125353610877, |
| "train_runtime": 10701.1707, |
| "train_samples_per_second": 15.874, |
| "train_steps_per_second": 0.031 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 330, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 552552911339520.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|