| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 367, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.027285129604365622, | |
| "grad_norm": 2.3641693958998617, | |
| "learning_rate": 2.432432432432433e-06, | |
| "loss": 0.2044, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.054570259208731244, | |
| "grad_norm": 1.4337953155689682, | |
| "learning_rate": 5.135135135135135e-06, | |
| "loss": 0.1107, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08185538881309687, | |
| "grad_norm": 0.76068821729334, | |
| "learning_rate": 7.837837837837838e-06, | |
| "loss": 0.0463, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10914051841746249, | |
| "grad_norm": 0.6475659229874231, | |
| "learning_rate": 9.99909372761763e-06, | |
| "loss": 0.0478, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1364256480218281, | |
| "grad_norm": 0.9686226673295716, | |
| "learning_rate": 9.96740867674275e-06, | |
| "loss": 0.0585, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16371077762619374, | |
| "grad_norm": 0.6271034418616805, | |
| "learning_rate": 9.890738003669029e-06, | |
| "loss": 0.0495, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.19099590723055934, | |
| "grad_norm": 0.8008519457996116, | |
| "learning_rate": 9.769776049884564e-06, | |
| "loss": 0.0441, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.21828103683492497, | |
| "grad_norm": 0.8781262839923529, | |
| "learning_rate": 9.60561826557425e-06, | |
| "loss": 0.0494, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.24556616643929058, | |
| "grad_norm": 0.705533350341626, | |
| "learning_rate": 9.399751289053267e-06, | |
| "loss": 0.0342, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2728512960436562, | |
| "grad_norm": 0.8249561019911485, | |
| "learning_rate": 9.154039483540273e-06, | |
| "loss": 0.0465, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.30013642564802184, | |
| "grad_norm": 0.7887431237474309, | |
| "learning_rate": 8.870708053195414e-06, | |
| "loss": 0.0375, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3274215552523875, | |
| "grad_norm": 0.339672156088185, | |
| "learning_rate": 8.552322891326846e-06, | |
| "loss": 0.0316, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.35470668485675305, | |
| "grad_norm": 0.6938753582388089, | |
| "learning_rate": 8.201767343263612e-06, | |
| "loss": 0.0414, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3819918144611187, | |
| "grad_norm": 0.9113487394533759, | |
| "learning_rate": 7.822216094333847e-06, | |
| "loss": 0.0427, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4092769440654843, | |
| "grad_norm": 0.9002304401720267, | |
| "learning_rate": 7.4171064194228196e-06, | |
| "loss": 0.0508, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.43656207366984995, | |
| "grad_norm": 0.46493723524279185, | |
| "learning_rate": 6.990107054479313e-06, | |
| "loss": 0.0353, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4638472032742155, | |
| "grad_norm": 0.5562245099793953, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.0509, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.49113233287858116, | |
| "grad_norm": 0.7505136690416153, | |
| "learning_rate": 6.08607036050254e-06, | |
| "loss": 0.046, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5184174624829468, | |
| "grad_norm": 0.5036723905882816, | |
| "learning_rate": 5.617220127763474e-06, | |
| "loss": 0.0426, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5457025920873124, | |
| "grad_norm": 0.5892958090446017, | |
| "learning_rate": 5.142780253968481e-06, | |
| "loss": 0.0382, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.572987721691678, | |
| "grad_norm": 0.43754682825635627, | |
| "learning_rate": 4.667047340083481e-06, | |
| "loss": 0.031, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6002728512960437, | |
| "grad_norm": 0.611056086304531, | |
| "learning_rate": 4.194329697045681e-06, | |
| "loss": 0.0372, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6275579809004093, | |
| "grad_norm": 0.4841019826566699, | |
| "learning_rate": 3.7289083290325668e-06, | |
| "loss": 0.0361, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.654843110504775, | |
| "grad_norm": 0.44138946851724803, | |
| "learning_rate": 3.274998164025148e-06, | |
| "loss": 0.0335, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6821282401091405, | |
| "grad_norm": 0.6831939247936597, | |
| "learning_rate": 2.8367098827674575e-06, | |
| "loss": 0.0346, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7094133697135061, | |
| "grad_norm": 0.9062857258621547, | |
| "learning_rate": 2.418012691805191e-06, | |
| "loss": 0.0321, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7366984993178718, | |
| "grad_norm": 0.546275501732938, | |
| "learning_rate": 2.0226983777365604e-06, | |
| "loss": 0.0331, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7639836289222374, | |
| "grad_norm": 0.3539679256575418, | |
| "learning_rate": 1.6543469682057105e-06, | |
| "loss": 0.0357, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.791268758526603, | |
| "grad_norm": 0.5908726655712924, | |
| "learning_rate": 1.3162943106179748e-06, | |
| "loss": 0.0333, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.8185538881309686, | |
| "grad_norm": 0.5732288853145052, | |
| "learning_rate": 1.0116018621892237e-06, | |
| "loss": 0.0396, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8458390177353342, | |
| "grad_norm": 0.5804321672176804, | |
| "learning_rate": 7.430289649152156e-07, | |
| "loss": 0.0292, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8731241473396999, | |
| "grad_norm": 0.4784774157551655, | |
| "learning_rate": 5.130078565432089e-07, | |
| "loss": 0.0276, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.9004092769440655, | |
| "grad_norm": 0.4332276557892971, | |
| "learning_rate": 3.2362164385026704e-07, | |
| "loss": 0.0378, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.927694406548431, | |
| "grad_norm": 0.8116661660491021, | |
| "learning_rate": 1.765854377057219e-07, | |
| "loss": 0.0261, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9549795361527967, | |
| "grad_norm": 0.7533605649573977, | |
| "learning_rate": 7.32308207615351e-08, | |
| "loss": 0.0333, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9822646657571623, | |
| "grad_norm": 0.518345083092868, | |
| "learning_rate": 1.449378843361271e-08, | |
| "loss": 0.0308, | |
| "step": 360 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 367, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 11191955800064.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |