| { |
| "best_global_step": 4000, |
| "best_metric": 0.6931638121604919, |
| "best_model_checkpoint": "/content/drive/MyDrive/PolyGuard/model_final/checkpoint-4000", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 8000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.025, |
| "grad_norm": 4.840836524963379, |
| "learning_rate": 2.4750000000000002e-05, |
| "loss": 0.6476753997802734, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 3.3573901653289795, |
| "learning_rate": 4.975e-05, |
| "loss": 0.6003963470458984, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.075, |
| "grad_norm": 5.3957953453063965, |
| "learning_rate": 4.96867088607595e-05, |
| "loss": 0.5752050399780273, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 6.348972797393799, |
| "learning_rate": 4.937025316455696e-05, |
| "loss": 0.5668778610229492, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.125, |
| "grad_norm": 3.777247428894043, |
| "learning_rate": 4.905379746835443e-05, |
| "loss": 0.6448370361328125, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 5.974273204803467, |
| "learning_rate": 4.87373417721519e-05, |
| "loss": 0.5914559173583984, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.175, |
| "grad_norm": 5.3988165855407715, |
| "learning_rate": 4.842088607594937e-05, |
| "loss": 0.6363700103759765, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.299058675765991, |
| "learning_rate": 4.810443037974684e-05, |
| "loss": 0.5667419815063477, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.225, |
| "grad_norm": 3.1783978939056396, |
| "learning_rate": 4.7787974683544305e-05, |
| "loss": 0.5284980392456055, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 3.9692907333374023, |
| "learning_rate": 4.747151898734177e-05, |
| "loss": 0.56060546875, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.275, |
| "grad_norm": 2.46657657623291, |
| "learning_rate": 4.715506329113925e-05, |
| "loss": 0.5778974151611328, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 1.4161385297775269, |
| "learning_rate": 4.683860759493671e-05, |
| "loss": 0.5550444412231446, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.325, |
| "grad_norm": 6.127955436706543, |
| "learning_rate": 4.652215189873418e-05, |
| "loss": 0.638050308227539, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 2.9411966800689697, |
| "learning_rate": 4.620569620253164e-05, |
| "loss": 0.5464860153198242, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 5.668337345123291, |
| "learning_rate": 4.588924050632912e-05, |
| "loss": 0.6324460220336914, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 5.304285049438477, |
| "learning_rate": 4.5572784810126585e-05, |
| "loss": 0.5795536041259766, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.425, |
| "grad_norm": 1.6371102333068848, |
| "learning_rate": 4.525632911392405e-05, |
| "loss": 0.5774750137329101, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 1.4340555667877197, |
| "learning_rate": 4.493987341772152e-05, |
| "loss": 0.6004017257690429, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.475, |
| "grad_norm": 2.997642755508423, |
| "learning_rate": 4.462341772151899e-05, |
| "loss": 0.6037093734741211, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.124809741973877, |
| "learning_rate": 4.430696202531646e-05, |
| "loss": 0.5478248596191406, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.525, |
| "grad_norm": 1.4012881517410278, |
| "learning_rate": 4.399050632911393e-05, |
| "loss": 0.5746703720092774, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 2.774062395095825, |
| "learning_rate": 4.367405063291139e-05, |
| "loss": 0.5518299865722657, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.575, |
| "grad_norm": 0.9460004568099976, |
| "learning_rate": 4.3357594936708864e-05, |
| "loss": 0.5653076553344727, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 4.564599514007568, |
| "learning_rate": 4.304113924050633e-05, |
| "loss": 0.626568832397461, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.9103949666023254, |
| "learning_rate": 4.27246835443038e-05, |
| "loss": 0.5939551544189453, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 3.449150800704956, |
| "learning_rate": 4.2408227848101265e-05, |
| "loss": 0.5971554946899414, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.675, |
| "grad_norm": 3.5549769401550293, |
| "learning_rate": 4.2091772151898736e-05, |
| "loss": 0.5751391983032227, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 1.6542292833328247, |
| "learning_rate": 4.177531645569621e-05, |
| "loss": 0.5986330032348632, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.725, |
| "grad_norm": 1.3074430227279663, |
| "learning_rate": 4.145886075949367e-05, |
| "loss": 0.5717515182495118, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 3.095973014831543, |
| "learning_rate": 4.114240506329114e-05, |
| "loss": 0.5813043594360352, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.775, |
| "grad_norm": 2.611328363418579, |
| "learning_rate": 4.0825949367088615e-05, |
| "loss": 0.6051469039916992, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 2.4239795207977295, |
| "learning_rate": 4.050949367088608e-05, |
| "loss": 0.5933720779418945, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.825, |
| "grad_norm": 1.3653069734573364, |
| "learning_rate": 4.0193037974683544e-05, |
| "loss": 0.5751293182373047, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 1.4362722635269165, |
| "learning_rate": 3.9876582278481015e-05, |
| "loss": 0.5673767852783204, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 1.1265358924865723, |
| "learning_rate": 3.956012658227849e-05, |
| "loss": 0.6048561477661133, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 1.3712810277938843, |
| "learning_rate": 3.924367088607595e-05, |
| "loss": 0.6653845977783203, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.925, |
| "grad_norm": 1.213412880897522, |
| "learning_rate": 3.8927215189873416e-05, |
| "loss": 0.6943452453613281, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 2.019483804702759, |
| "learning_rate": 3.861075949367089e-05, |
| "loss": 0.6974296569824219, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.975, |
| "grad_norm": 1.273471713066101, |
| "learning_rate": 3.829430379746836e-05, |
| "loss": 0.706220474243164, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.3656868934631348, |
| "learning_rate": 3.7977848101265823e-05, |
| "loss": 0.702726058959961, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.6931638121604919, |
| "eval_runtime": 166.9577, |
| "eval_samples_per_second": 47.916, |
| "eval_steps_per_second": 5.99, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.025, |
| "grad_norm": 1.4108144044876099, |
| "learning_rate": 3.7661392405063295e-05, |
| "loss": 0.6999430847167969, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.05, |
| "grad_norm": 2.259119987487793, |
| "learning_rate": 3.734493670886076e-05, |
| "loss": 0.7005876922607421, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.075, |
| "grad_norm": 0.8190616965293884, |
| "learning_rate": 3.702848101265823e-05, |
| "loss": 0.6983786010742188, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.1, |
| "grad_norm": 1.3949053287506104, |
| "learning_rate": 3.67120253164557e-05, |
| "loss": 0.6970309448242188, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 1.9007196426391602, |
| "learning_rate": 3.639556962025317e-05, |
| "loss": 0.7024803161621094, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.15, |
| "grad_norm": 3.2963850498199463, |
| "learning_rate": 3.607911392405063e-05, |
| "loss": 0.6969153594970703, |
| "step": 4600 |
| }, |
| { |
| "epoch": 1.175, |
| "grad_norm": 2.8197951316833496, |
| "learning_rate": 3.57626582278481e-05, |
| "loss": 0.6913418579101562, |
| "step": 4700 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 3.7260422706604004, |
| "learning_rate": 3.5446202531645574e-05, |
| "loss": 0.6954251098632812, |
| "step": 4800 |
| }, |
| { |
| "epoch": 1.225, |
| "grad_norm": 2.538835048675537, |
| "learning_rate": 3.512974683544304e-05, |
| "loss": 0.7016423797607422, |
| "step": 4900 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 1.9548262357711792, |
| "learning_rate": 3.48132911392405e-05, |
| "loss": 0.6976743316650391, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.275, |
| "grad_norm": 4.0937180519104, |
| "learning_rate": 3.4496835443037975e-05, |
| "loss": 0.700084228515625, |
| "step": 5100 |
| }, |
| { |
| "epoch": 1.3, |
| "grad_norm": 2.521569013595581, |
| "learning_rate": 3.4180379746835446e-05, |
| "loss": 0.6976382446289062, |
| "step": 5200 |
| }, |
| { |
| "epoch": 1.325, |
| "grad_norm": 4.047502517700195, |
| "learning_rate": 3.386392405063291e-05, |
| "loss": 0.6967656707763672, |
| "step": 5300 |
| }, |
| { |
| "epoch": 1.35, |
| "grad_norm": 3.4342639446258545, |
| "learning_rate": 3.354746835443038e-05, |
| "loss": 0.6955546569824219, |
| "step": 5400 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 1.5393496751785278, |
| "learning_rate": 3.3231012658227854e-05, |
| "loss": 0.6965518951416015, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 4.321380138397217, |
| "learning_rate": 3.291455696202532e-05, |
| "loss": 0.6958496856689453, |
| "step": 5600 |
| }, |
| { |
| "epoch": 1.425, |
| "grad_norm": 1.1191785335540771, |
| "learning_rate": 3.259810126582279e-05, |
| "loss": 0.6999098968505859, |
| "step": 5700 |
| }, |
| { |
| "epoch": 1.45, |
| "grad_norm": 0.8614036440849304, |
| "learning_rate": 3.2281645569620254e-05, |
| "loss": 0.6959712219238281, |
| "step": 5800 |
| }, |
| { |
| "epoch": 1.475, |
| "grad_norm": 0.9664958715438843, |
| "learning_rate": 3.1965189873417725e-05, |
| "loss": 0.6925695037841797, |
| "step": 5900 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 2.1453211307525635, |
| "learning_rate": 3.164873417721519e-05, |
| "loss": 0.698175048828125, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.525, |
| "grad_norm": 2.1440930366516113, |
| "learning_rate": 3.133227848101266e-05, |
| "loss": 0.682925796508789, |
| "step": 6100 |
| }, |
| { |
| "epoch": 1.55, |
| "grad_norm": 0.8990124464035034, |
| "learning_rate": 3.1015822784810126e-05, |
| "loss": 0.6517456817626953, |
| "step": 6200 |
| }, |
| { |
| "epoch": 1.575, |
| "grad_norm": 1.3929342031478882, |
| "learning_rate": 3.06993670886076e-05, |
| "loss": 0.7006549835205078, |
| "step": 6300 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 2.1912875175476074, |
| "learning_rate": 3.0382911392405065e-05, |
| "loss": 0.6975661468505859, |
| "step": 6400 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 1.1694247722625732, |
| "learning_rate": 3.0066455696202533e-05, |
| "loss": 0.6955360412597656, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.65, |
| "grad_norm": 3.340589761734009, |
| "learning_rate": 2.975e-05, |
| "loss": 0.7008393859863281, |
| "step": 6600 |
| }, |
| { |
| "epoch": 1.675, |
| "grad_norm": 2.42465877532959, |
| "learning_rate": 2.9433544303797473e-05, |
| "loss": 0.6998232269287109, |
| "step": 6700 |
| }, |
| { |
| "epoch": 1.7, |
| "grad_norm": 1.9139105081558228, |
| "learning_rate": 2.9117088607594937e-05, |
| "loss": 0.6989698028564453, |
| "step": 6800 |
| }, |
| { |
| "epoch": 1.725, |
| "grad_norm": 0.7264005541801453, |
| "learning_rate": 2.8800632911392405e-05, |
| "loss": 0.6944959259033203, |
| "step": 6900 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 1.1322827339172363, |
| "learning_rate": 2.8484177215189873e-05, |
| "loss": 0.7098442840576172, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.775, |
| "grad_norm": 2.150141477584839, |
| "learning_rate": 2.8167721518987345e-05, |
| "loss": 0.6946941375732422, |
| "step": 7100 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 1.850095510482788, |
| "learning_rate": 2.785126582278481e-05, |
| "loss": 0.6941130065917969, |
| "step": 7200 |
| }, |
| { |
| "epoch": 1.825, |
| "grad_norm": 1.994320273399353, |
| "learning_rate": 2.7534810126582277e-05, |
| "loss": 0.6933687591552734, |
| "step": 7300 |
| }, |
| { |
| "epoch": 1.85, |
| "grad_norm": 1.1272798776626587, |
| "learning_rate": 2.721835443037975e-05, |
| "loss": 0.7010916900634766, |
| "step": 7400 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 2.2662463188171387, |
| "learning_rate": 2.6901898734177217e-05, |
| "loss": 0.6951226806640625, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.9, |
| "grad_norm": 2.8019468784332275, |
| "learning_rate": 2.6585443037974685e-05, |
| "loss": 0.6966998291015625, |
| "step": 7600 |
| }, |
| { |
| "epoch": 1.925, |
| "grad_norm": 2.3949637413024902, |
| "learning_rate": 2.6268987341772156e-05, |
| "loss": 0.6956380462646484, |
| "step": 7700 |
| }, |
| { |
| "epoch": 1.95, |
| "grad_norm": 2.5100715160369873, |
| "learning_rate": 2.595253164556962e-05, |
| "loss": 0.6968719482421875, |
| "step": 7800 |
| }, |
| { |
| "epoch": 1.975, |
| "grad_norm": 6.460758209228516, |
| "learning_rate": 2.563607594936709e-05, |
| "loss": 0.692848892211914, |
| "step": 7900 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 3.2004637718200684, |
| "learning_rate": 2.5319620253164557e-05, |
| "loss": 0.6972612762451171, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.6932027339935303, |
| "eval_runtime": 166.3088, |
| "eval_samples_per_second": 48.103, |
| "eval_steps_per_second": 6.013, |
| "step": 8000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 16000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8419553771520000.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|