Video-Text-to-Text
Transformers
Safetensors
qwen2_vl_bev
text-generation
llama-factory
full
Generated from Trainer
spatial-intelligence
3d-vision
Instructions to use Spacewanderer8263/Proxy3D-8B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Spacewanderer8263/Proxy3D-8B with Transformers:
# Load model directly from transformers import AutoModelForSeq2SeqLM model = AutoModelForSeq2SeqLM.from_pretrained("Spacewanderer8263/Proxy3D-8B", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 2486, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0004023335345001006, | |
| "grad_norm": 27.82010551409744, | |
| "learning_rate": 0.0, | |
| "loss": 1.7886, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0008046670690002012, | |
| "grad_norm": 19.872529474831897, | |
| "learning_rate": 2.008032128514056e-08, | |
| "loss": 1.8136, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0012070006035003018, | |
| "grad_norm": 24.180123303383308, | |
| "learning_rate": 4.016064257028112e-08, | |
| "loss": 1.9299, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0016093341380004024, | |
| "grad_norm": 21.489391381684396, | |
| "learning_rate": 6.02409638554217e-08, | |
| "loss": 1.92, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0020116676725005027, | |
| "grad_norm": 17.558745295130834, | |
| "learning_rate": 8.032128514056224e-08, | |
| "loss": 1.8227, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0024140012070006035, | |
| "grad_norm": 21.46699607887353, | |
| "learning_rate": 1.0040160642570281e-07, | |
| "loss": 1.8824, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.002816334741500704, | |
| "grad_norm": 21.298475273313784, | |
| "learning_rate": 1.204819277108434e-07, | |
| "loss": 1.8179, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0032186682760008047, | |
| "grad_norm": 21.372007205869878, | |
| "learning_rate": 1.4056224899598394e-07, | |
| "loss": 1.6859, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.003621001810500905, | |
| "grad_norm": 23.292357598058903, | |
| "learning_rate": 1.6064257028112448e-07, | |
| "loss": 2.0856, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0040233353450010055, | |
| "grad_norm": 23.233045584039576, | |
| "learning_rate": 1.8072289156626505e-07, | |
| "loss": 2.1395, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004425668879501106, | |
| "grad_norm": 19.4337570491022, | |
| "learning_rate": 2.0080321285140563e-07, | |
| "loss": 1.8688, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.004828002414001207, | |
| "grad_norm": 24.82725637763247, | |
| "learning_rate": 2.208835341365462e-07, | |
| "loss": 1.8054, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.005230335948501308, | |
| "grad_norm": 27.369177322875704, | |
| "learning_rate": 2.409638554216868e-07, | |
| "loss": 1.8794, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.005632669483001408, | |
| "grad_norm": 20.854134960964704, | |
| "learning_rate": 2.610441767068273e-07, | |
| "loss": 1.8873, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.006035003017501509, | |
| "grad_norm": 25.685971689153487, | |
| "learning_rate": 2.811244979919679e-07, | |
| "loss": 2.0743, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0064373365520016095, | |
| "grad_norm": 21.501547984115398, | |
| "learning_rate": 3.0120481927710845e-07, | |
| "loss": 1.8415, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.00683967008650171, | |
| "grad_norm": 22.75991867094887, | |
| "learning_rate": 3.2128514056224897e-07, | |
| "loss": 1.9089, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.00724200362100181, | |
| "grad_norm": 22.23601729043514, | |
| "learning_rate": 3.413654618473896e-07, | |
| "loss": 1.9641, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.007644337155501911, | |
| "grad_norm": 26.35207563502954, | |
| "learning_rate": 3.614457831325301e-07, | |
| "loss": 2.0458, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.008046670690002011, | |
| "grad_norm": 20.57786492934256, | |
| "learning_rate": 3.8152610441767073e-07, | |
| "loss": 1.7877, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008449004224502113, | |
| "grad_norm": 25.563694935501676, | |
| "learning_rate": 4.0160642570281125e-07, | |
| "loss": 2.1414, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.008851337759002213, | |
| "grad_norm": 18.98280746175437, | |
| "learning_rate": 4.216867469879518e-07, | |
| "loss": 1.7995, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.009253671293502314, | |
| "grad_norm": 22.278384224532818, | |
| "learning_rate": 4.417670682730924e-07, | |
| "loss": 2.0412, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.009656004828002414, | |
| "grad_norm": 18.256806226009903, | |
| "learning_rate": 4.6184738955823296e-07, | |
| "loss": 1.7538, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.010058338362502514, | |
| "grad_norm": 19.087838303092653, | |
| "learning_rate": 4.819277108433736e-07, | |
| "loss": 1.708, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.010460671897002616, | |
| "grad_norm": 18.961034656693933, | |
| "learning_rate": 5.020080321285141e-07, | |
| "loss": 1.6661, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.010863005431502716, | |
| "grad_norm": 18.11814523029599, | |
| "learning_rate": 5.220883534136546e-07, | |
| "loss": 1.753, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.011265338966002816, | |
| "grad_norm": 19.63691340845534, | |
| "learning_rate": 5.421686746987952e-07, | |
| "loss": 1.7005, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.011667672500502917, | |
| "grad_norm": 16.293048438980087, | |
| "learning_rate": 5.622489959839358e-07, | |
| "loss": 1.7269, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.012070006035003017, | |
| "grad_norm": 15.86827584036101, | |
| "learning_rate": 5.823293172690764e-07, | |
| "loss": 1.5401, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.012472339569503117, | |
| "grad_norm": 17.38302283506762, | |
| "learning_rate": 6.024096385542169e-07, | |
| "loss": 1.5943, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.012874673104003219, | |
| "grad_norm": 15.374566189246599, | |
| "learning_rate": 6.224899598393574e-07, | |
| "loss": 1.6221, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.013277006638503319, | |
| "grad_norm": 14.904500940732934, | |
| "learning_rate": 6.425702811244979e-07, | |
| "loss": 1.5452, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.01367934017300342, | |
| "grad_norm": 16.29526851000047, | |
| "learning_rate": 6.626506024096387e-07, | |
| "loss": 1.2262, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.01408167370750352, | |
| "grad_norm": 12.819998198911712, | |
| "learning_rate": 6.827309236947792e-07, | |
| "loss": 1.3476, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01448400724200362, | |
| "grad_norm": 13.25182925491938, | |
| "learning_rate": 7.028112449799197e-07, | |
| "loss": 1.4371, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.014886340776503722, | |
| "grad_norm": 14.477871111934059, | |
| "learning_rate": 7.228915662650602e-07, | |
| "loss": 1.3476, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.015288674311003822, | |
| "grad_norm": 14.6196272151239, | |
| "learning_rate": 7.429718875502008e-07, | |
| "loss": 1.5309, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.015691007845503924, | |
| "grad_norm": 11.90353169885529, | |
| "learning_rate": 7.630522088353415e-07, | |
| "loss": 1.3026, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.016093341380004022, | |
| "grad_norm": 13.271008324770724, | |
| "learning_rate": 7.83132530120482e-07, | |
| "loss": 1.4655, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.016495674914504124, | |
| "grad_norm": 11.2349199407351, | |
| "learning_rate": 8.032128514056225e-07, | |
| "loss": 1.2782, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.016898008449004225, | |
| "grad_norm": 9.663111453936613, | |
| "learning_rate": 8.232931726907631e-07, | |
| "loss": 1.0628, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.017300341983504323, | |
| "grad_norm": 10.290302552273262, | |
| "learning_rate": 8.433734939759036e-07, | |
| "loss": 1.2398, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.017702675518004425, | |
| "grad_norm": 9.437964794637868, | |
| "learning_rate": 8.634538152610443e-07, | |
| "loss": 1.1087, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.018105009052504527, | |
| "grad_norm": 9.928972956598479, | |
| "learning_rate": 8.835341365461848e-07, | |
| "loss": 1.1231, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01850734258700463, | |
| "grad_norm": 7.193942700746645, | |
| "learning_rate": 9.036144578313254e-07, | |
| "loss": 0.9053, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.018909676121504727, | |
| "grad_norm": 8.545887733782381, | |
| "learning_rate": 9.236947791164659e-07, | |
| "loss": 1.0188, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.01931200965600483, | |
| "grad_norm": 9.964310833592453, | |
| "learning_rate": 9.437751004016064e-07, | |
| "loss": 1.2336, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.01971434319050493, | |
| "grad_norm": 8.465102281501581, | |
| "learning_rate": 9.638554216867472e-07, | |
| "loss": 0.978, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.020116676725005028, | |
| "grad_norm": 8.387761909932218, | |
| "learning_rate": 9.839357429718876e-07, | |
| "loss": 1.0664, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02051901025950513, | |
| "grad_norm": 8.853387211734997, | |
| "learning_rate": 1.0040160642570282e-06, | |
| "loss": 1.1316, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.02092134379400523, | |
| "grad_norm": 8.36811488486395, | |
| "learning_rate": 1.0240963855421688e-06, | |
| "loss": 0.9175, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.02132367732850533, | |
| "grad_norm": 8.394630361238342, | |
| "learning_rate": 1.0441767068273092e-06, | |
| "loss": 0.9371, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.02172601086300543, | |
| "grad_norm": 7.411500707300452, | |
| "learning_rate": 1.0642570281124499e-06, | |
| "loss": 0.9567, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.022128344397505533, | |
| "grad_norm": 8.194667792436356, | |
| "learning_rate": 1.0843373493975905e-06, | |
| "loss": 0.9994, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.02253067793200563, | |
| "grad_norm": 7.141458407534788, | |
| "learning_rate": 1.1044176706827311e-06, | |
| "loss": 0.8714, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.022933011466505733, | |
| "grad_norm": 5.765708921674402, | |
| "learning_rate": 1.1244979919678715e-06, | |
| "loss": 0.9238, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.023335345001005835, | |
| "grad_norm": 7.924253998059311, | |
| "learning_rate": 1.1445783132530121e-06, | |
| "loss": 0.9884, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.023737678535505933, | |
| "grad_norm": 6.597672306780267, | |
| "learning_rate": 1.1646586345381528e-06, | |
| "loss": 0.9904, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.024140012070006035, | |
| "grad_norm": 6.937688487718752, | |
| "learning_rate": 1.1847389558232934e-06, | |
| "loss": 0.971, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.024542345604506136, | |
| "grad_norm": 6.148469030855221, | |
| "learning_rate": 1.2048192771084338e-06, | |
| "loss": 0.7868, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.024944679139006234, | |
| "grad_norm": 6.388105517333996, | |
| "learning_rate": 1.2248995983935744e-06, | |
| "loss": 0.8567, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.025347012673506336, | |
| "grad_norm": 6.105324559387419, | |
| "learning_rate": 1.2449799196787148e-06, | |
| "loss": 0.9989, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.025749346208006438, | |
| "grad_norm": 6.565241933640869, | |
| "learning_rate": 1.2650602409638555e-06, | |
| "loss": 0.8641, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.02615167974250654, | |
| "grad_norm": 5.869907162685047, | |
| "learning_rate": 1.2851405622489959e-06, | |
| "loss": 0.9526, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.026554013277006638, | |
| "grad_norm": 5.851371649159624, | |
| "learning_rate": 1.3052208835341367e-06, | |
| "loss": 0.8387, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.02695634681150674, | |
| "grad_norm": 6.3493989766408765, | |
| "learning_rate": 1.3253012048192773e-06, | |
| "loss": 1.0458, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.02735868034600684, | |
| "grad_norm": 5.9283912382403114, | |
| "learning_rate": 1.345381526104418e-06, | |
| "loss": 1.0158, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.02776101388050694, | |
| "grad_norm": 6.184126721660312, | |
| "learning_rate": 1.3654618473895584e-06, | |
| "loss": 0.8218, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.02816334741500704, | |
| "grad_norm": 6.0169493477739975, | |
| "learning_rate": 1.385542168674699e-06, | |
| "loss": 0.7966, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.028565680949507143, | |
| "grad_norm": 6.084096016864222, | |
| "learning_rate": 1.4056224899598394e-06, | |
| "loss": 0.8221, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.02896801448400724, | |
| "grad_norm": 6.441697966234361, | |
| "learning_rate": 1.42570281124498e-06, | |
| "loss": 0.7189, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.029370348018507342, | |
| "grad_norm": 5.684758471405106, | |
| "learning_rate": 1.4457831325301204e-06, | |
| "loss": 0.811, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.029772681553007444, | |
| "grad_norm": 6.066554340342954, | |
| "learning_rate": 1.465863453815261e-06, | |
| "loss": 0.8128, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.030175015087507542, | |
| "grad_norm": 5.398586406654046, | |
| "learning_rate": 1.4859437751004017e-06, | |
| "loss": 0.8521, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.030577348622007644, | |
| "grad_norm": 5.72164636854203, | |
| "learning_rate": 1.5060240963855425e-06, | |
| "loss": 0.8733, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.030979682156507746, | |
| "grad_norm": 5.317662321058184, | |
| "learning_rate": 1.526104417670683e-06, | |
| "loss": 0.6758, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.03138201569100785, | |
| "grad_norm": 5.008046012529329, | |
| "learning_rate": 1.5461847389558236e-06, | |
| "loss": 0.77, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.03178434922550795, | |
| "grad_norm": 5.806938383916589, | |
| "learning_rate": 1.566265060240964e-06, | |
| "loss": 0.9218, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.032186682760008044, | |
| "grad_norm": 5.687427641764964, | |
| "learning_rate": 1.5863453815261046e-06, | |
| "loss": 0.873, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.032589016294508145, | |
| "grad_norm": 5.877177229539529, | |
| "learning_rate": 1.606425702811245e-06, | |
| "loss": 0.8347, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.03299134982900825, | |
| "grad_norm": 5.661277204998088, | |
| "learning_rate": 1.6265060240963856e-06, | |
| "loss": 0.7537, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.03339368336350835, | |
| "grad_norm": 6.0004706518847675, | |
| "learning_rate": 1.6465863453815263e-06, | |
| "loss": 0.9341, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.03379601689800845, | |
| "grad_norm": 5.383372715468371, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.716, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.03419835043250855, | |
| "grad_norm": 5.870492937673813, | |
| "learning_rate": 1.6867469879518073e-06, | |
| "loss": 0.8515, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.03460068396700865, | |
| "grad_norm": 5.40282247472889, | |
| "learning_rate": 1.7068273092369481e-06, | |
| "loss": 0.7575, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.03500301750150875, | |
| "grad_norm": 5.244298542730251, | |
| "learning_rate": 1.7269076305220885e-06, | |
| "loss": 0.7342, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.03540535103600885, | |
| "grad_norm": 5.118324863903489, | |
| "learning_rate": 1.7469879518072292e-06, | |
| "loss": 0.7633, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.03580768457050895, | |
| "grad_norm": 5.559590746485852, | |
| "learning_rate": 1.7670682730923696e-06, | |
| "loss": 0.915, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.036210018105009054, | |
| "grad_norm": 5.093580932305843, | |
| "learning_rate": 1.7871485943775102e-06, | |
| "loss": 0.7234, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.036612351639509155, | |
| "grad_norm": 5.949076037097858, | |
| "learning_rate": 1.8072289156626508e-06, | |
| "loss": 0.9392, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.03701468517400926, | |
| "grad_norm": 6.126962098905458, | |
| "learning_rate": 1.8273092369477912e-06, | |
| "loss": 0.7973, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.03741701870850935, | |
| "grad_norm": 5.418925049178253, | |
| "learning_rate": 1.8473895582329318e-06, | |
| "loss": 0.7386, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.03781935224300945, | |
| "grad_norm": 5.30622581323819, | |
| "learning_rate": 1.8674698795180723e-06, | |
| "loss": 0.7712, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.038221685777509555, | |
| "grad_norm": 5.203905538553713, | |
| "learning_rate": 1.8875502008032129e-06, | |
| "loss": 0.7403, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.03862401931200966, | |
| "grad_norm": 5.497752744670633, | |
| "learning_rate": 1.9076305220883537e-06, | |
| "loss": 0.8343, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.03902635284650976, | |
| "grad_norm": 5.792666990034754, | |
| "learning_rate": 1.9277108433734943e-06, | |
| "loss": 0.9855, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.03942868638100986, | |
| "grad_norm": 4.95103734609948, | |
| "learning_rate": 1.947791164658635e-06, | |
| "loss": 0.7676, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.039831019915509955, | |
| "grad_norm": 5.313239350915902, | |
| "learning_rate": 1.967871485943775e-06, | |
| "loss": 0.8112, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.040233353450010056, | |
| "grad_norm": 5.262581677945773, | |
| "learning_rate": 1.987951807228916e-06, | |
| "loss": 0.7726, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04063568698451016, | |
| "grad_norm": 5.7284063119812965, | |
| "learning_rate": 2.0080321285140564e-06, | |
| "loss": 0.7143, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.04103802051901026, | |
| "grad_norm": 5.61074467493987, | |
| "learning_rate": 2.028112449799197e-06, | |
| "loss": 0.7821, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.04144035405351036, | |
| "grad_norm": 5.668818940572837, | |
| "learning_rate": 2.0481927710843377e-06, | |
| "loss": 0.6217, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.04184268758801046, | |
| "grad_norm": 6.102827563222031, | |
| "learning_rate": 2.068273092369478e-06, | |
| "loss": 0.7094, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.04224502112251056, | |
| "grad_norm": 5.106901986680839, | |
| "learning_rate": 2.0883534136546185e-06, | |
| "loss": 0.7452, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.04264735465701066, | |
| "grad_norm": 5.320689458556449, | |
| "learning_rate": 2.1084337349397595e-06, | |
| "loss": 0.8121, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.04304968819151076, | |
| "grad_norm": 4.851575241509713, | |
| "learning_rate": 2.1285140562248997e-06, | |
| "loss": 0.7695, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.04345202172601086, | |
| "grad_norm": 5.7294050539399315, | |
| "learning_rate": 2.1485943775100404e-06, | |
| "loss": 0.8343, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.043854355260510965, | |
| "grad_norm": 6.2102432153860265, | |
| "learning_rate": 2.168674698795181e-06, | |
| "loss": 0.8478, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.044256688795011066, | |
| "grad_norm": 5.8055227032618415, | |
| "learning_rate": 2.1887550200803216e-06, | |
| "loss": 0.7647, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04465902232951117, | |
| "grad_norm": 5.59165747057277, | |
| "learning_rate": 2.2088353413654622e-06, | |
| "loss": 0.6905, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.04506135586401126, | |
| "grad_norm": 4.682897930842293, | |
| "learning_rate": 2.2289156626506024e-06, | |
| "loss": 0.6875, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.045463689398511364, | |
| "grad_norm": 4.320466219380915, | |
| "learning_rate": 2.248995983935743e-06, | |
| "loss": 0.696, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.045866022933011466, | |
| "grad_norm": 5.050663115560423, | |
| "learning_rate": 2.2690763052208837e-06, | |
| "loss": 0.6224, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.04626835646751157, | |
| "grad_norm": 4.681780822596801, | |
| "learning_rate": 2.2891566265060243e-06, | |
| "loss": 0.6462, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.04667069000201167, | |
| "grad_norm": 5.954135299109462, | |
| "learning_rate": 2.309236947791165e-06, | |
| "loss": 0.8848, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.04707302353651177, | |
| "grad_norm": 5.573998592787985, | |
| "learning_rate": 2.3293172690763055e-06, | |
| "loss": 0.8088, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.047475357071011866, | |
| "grad_norm": 5.070007694615057, | |
| "learning_rate": 2.349397590361446e-06, | |
| "loss": 0.7173, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.04787769060551197, | |
| "grad_norm": 5.671554086319649, | |
| "learning_rate": 2.3694779116465868e-06, | |
| "loss": 0.8136, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.04828002414001207, | |
| "grad_norm": 5.769697685018851, | |
| "learning_rate": 2.389558232931727e-06, | |
| "loss": 0.7733, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04868235767451217, | |
| "grad_norm": 4.920298621449247, | |
| "learning_rate": 2.4096385542168676e-06, | |
| "loss": 0.5931, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.04908469120901227, | |
| "grad_norm": 5.49690667652572, | |
| "learning_rate": 2.4297188755020082e-06, | |
| "loss": 0.8079, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.049487024743512374, | |
| "grad_norm": 5.369187249008034, | |
| "learning_rate": 2.449799196787149e-06, | |
| "loss": 0.726, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.04988935827801247, | |
| "grad_norm": 4.8515152911462796, | |
| "learning_rate": 2.469879518072289e-06, | |
| "loss": 0.7691, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.05029169181251257, | |
| "grad_norm": 5.099204794104859, | |
| "learning_rate": 2.4899598393574297e-06, | |
| "loss": 0.8361, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05069402534701267, | |
| "grad_norm": 5.390609640239935, | |
| "learning_rate": 2.5100401606425707e-06, | |
| "loss": 0.6527, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.051096358881512774, | |
| "grad_norm": 5.053936099287885, | |
| "learning_rate": 2.530120481927711e-06, | |
| "loss": 0.7326, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.051498692416012876, | |
| "grad_norm": 5.876719841742455, | |
| "learning_rate": 2.5502008032128516e-06, | |
| "loss": 0.8571, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.05190102595051298, | |
| "grad_norm": 5.291120710287588, | |
| "learning_rate": 2.5702811244979918e-06, | |
| "loss": 0.7641, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.05230335948501308, | |
| "grad_norm": 5.2648612797895336, | |
| "learning_rate": 2.590361445783133e-06, | |
| "loss": 0.768, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.052705693019513174, | |
| "grad_norm": 5.146667666976231, | |
| "learning_rate": 2.6104417670682734e-06, | |
| "loss": 0.729, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.053108026554013275, | |
| "grad_norm": 5.607251668752912, | |
| "learning_rate": 2.6305220883534136e-06, | |
| "loss": 0.7812, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.05351036008851338, | |
| "grad_norm": 4.774953294338218, | |
| "learning_rate": 2.6506024096385547e-06, | |
| "loss": 0.6996, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.05391269362301348, | |
| "grad_norm": 5.187652607031923, | |
| "learning_rate": 2.670682730923695e-06, | |
| "loss": 0.7029, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.05431502715751358, | |
| "grad_norm": 5.631076339839678, | |
| "learning_rate": 2.690763052208836e-06, | |
| "loss": 0.7618, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.05471736069201368, | |
| "grad_norm": 5.1080370644891735, | |
| "learning_rate": 2.710843373493976e-06, | |
| "loss": 0.6746, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.05511969422651378, | |
| "grad_norm": 5.177774314908037, | |
| "learning_rate": 2.7309236947791167e-06, | |
| "loss": 0.6576, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.05552202776101388, | |
| "grad_norm": 5.4428731816372355, | |
| "learning_rate": 2.751004016064257e-06, | |
| "loss": 0.8666, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.05592436129551398, | |
| "grad_norm": 4.737577892176393, | |
| "learning_rate": 2.771084337349398e-06, | |
| "loss": 0.6303, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.05632669483001408, | |
| "grad_norm": 5.958074686629733, | |
| "learning_rate": 2.791164658634538e-06, | |
| "loss": 0.7375, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.056729028364514184, | |
| "grad_norm": 5.347638262573924, | |
| "learning_rate": 2.811244979919679e-06, | |
| "loss": 0.7496, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.057131361899014285, | |
| "grad_norm": 5.25633214854894, | |
| "learning_rate": 2.83132530120482e-06, | |
| "loss": 0.7703, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.05753369543351439, | |
| "grad_norm": 6.3493191578507355, | |
| "learning_rate": 2.85140562248996e-06, | |
| "loss": 0.7297, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.05793602896801448, | |
| "grad_norm": 5.291536986822124, | |
| "learning_rate": 2.8714859437751007e-06, | |
| "loss": 0.7848, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.05833836250251458, | |
| "grad_norm": 4.327776932092466, | |
| "learning_rate": 2.891566265060241e-06, | |
| "loss": 0.5537, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.058740696037014685, | |
| "grad_norm": 4.827406966450388, | |
| "learning_rate": 2.911646586345382e-06, | |
| "loss": 0.6614, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.05914302957151479, | |
| "grad_norm": 5.4512595989034605, | |
| "learning_rate": 2.931726907630522e-06, | |
| "loss": 0.5105, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.05954536310601489, | |
| "grad_norm": 5.349806231832712, | |
| "learning_rate": 2.9518072289156627e-06, | |
| "loss": 0.7398, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.05994769664051499, | |
| "grad_norm": 5.376464953972205, | |
| "learning_rate": 2.9718875502008034e-06, | |
| "loss": 0.832, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.060350030175015085, | |
| "grad_norm": 4.4973784287811, | |
| "learning_rate": 2.991967871485944e-06, | |
| "loss": 0.6631, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.060752363709515186, | |
| "grad_norm": 4.1574904576313925, | |
| "learning_rate": 3.012048192771085e-06, | |
| "loss": 0.5736, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.06115469724401529, | |
| "grad_norm": 4.932454920014607, | |
| "learning_rate": 3.0321285140562252e-06, | |
| "loss": 0.6843, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.06155703077851539, | |
| "grad_norm": 4.920072821237323, | |
| "learning_rate": 3.052208835341366e-06, | |
| "loss": 0.6115, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.06195936431301549, | |
| "grad_norm": 5.069151288790082, | |
| "learning_rate": 3.072289156626506e-06, | |
| "loss": 0.6592, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.06236169784751559, | |
| "grad_norm": 4.940734045454529, | |
| "learning_rate": 3.092369477911647e-06, | |
| "loss": 0.6823, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.0627640313820157, | |
| "grad_norm": 4.7329970403721635, | |
| "learning_rate": 3.1124497991967873e-06, | |
| "loss": 0.6183, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.0631663649165158, | |
| "grad_norm": 5.584324231160161, | |
| "learning_rate": 3.132530120481928e-06, | |
| "loss": 0.8407, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.0635686984510159, | |
| "grad_norm": 4.9494988025514095, | |
| "learning_rate": 3.152610441767068e-06, | |
| "loss": 0.7362, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.06397103198551599, | |
| "grad_norm": 4.826336881024485, | |
| "learning_rate": 3.172690763052209e-06, | |
| "loss": 0.6756, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.06437336552001609, | |
| "grad_norm": 4.663584106854257, | |
| "learning_rate": 3.1927710843373494e-06, | |
| "loss": 0.6618, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06477569905451619, | |
| "grad_norm": 5.358113823802763, | |
| "learning_rate": 3.21285140562249e-06, | |
| "loss": 0.7845, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.06517803258901629, | |
| "grad_norm": 4.393966562734061, | |
| "learning_rate": 3.232931726907631e-06, | |
| "loss": 0.6643, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.06558036612351639, | |
| "grad_norm": 5.269005574739929, | |
| "learning_rate": 3.2530120481927713e-06, | |
| "loss": 0.7892, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.0659826996580165, | |
| "grad_norm": 5.191377211307637, | |
| "learning_rate": 3.2730923694779123e-06, | |
| "loss": 0.6834, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.0663850331925166, | |
| "grad_norm": 5.9411717854682875, | |
| "learning_rate": 3.2931726907630525e-06, | |
| "loss": 0.682, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.0667873667270167, | |
| "grad_norm": 4.657895689681778, | |
| "learning_rate": 3.313253012048193e-06, | |
| "loss": 0.6533, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.0671897002615168, | |
| "grad_norm": 4.689805393308291, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.5851, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.0675920337960169, | |
| "grad_norm": 4.934046837424603, | |
| "learning_rate": 3.3534136546184744e-06, | |
| "loss": 0.7834, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.067994367330517, | |
| "grad_norm": 5.295304511873193, | |
| "learning_rate": 3.3734939759036146e-06, | |
| "loss": 0.6978, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.0683967008650171, | |
| "grad_norm": 5.8805644086916935, | |
| "learning_rate": 3.393574297188755e-06, | |
| "loss": 0.7022, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0687990343995172, | |
| "grad_norm": 4.986410631963467, | |
| "learning_rate": 3.4136546184738962e-06, | |
| "loss": 0.8589, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.0692013679340173, | |
| "grad_norm": 4.587536055949948, | |
| "learning_rate": 3.4337349397590364e-06, | |
| "loss": 0.5962, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.0696037014685174, | |
| "grad_norm": 4.542521557186459, | |
| "learning_rate": 3.453815261044177e-06, | |
| "loss": 0.5778, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.0700060350030175, | |
| "grad_norm": 5.006237220238328, | |
| "learning_rate": 3.4738955823293173e-06, | |
| "loss": 0.7066, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.0704083685375176, | |
| "grad_norm": 5.03127022104005, | |
| "learning_rate": 3.4939759036144583e-06, | |
| "loss": 0.7143, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0708107020720177, | |
| "grad_norm": 4.978976609084576, | |
| "learning_rate": 3.5140562248995985e-06, | |
| "loss": 0.5806, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.0712130356065178, | |
| "grad_norm": 4.953415145134539, | |
| "learning_rate": 3.534136546184739e-06, | |
| "loss": 0.6169, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.0716153691410179, | |
| "grad_norm": 5.224274445585009, | |
| "learning_rate": 3.5542168674698798e-06, | |
| "loss": 0.7055, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.072017702675518, | |
| "grad_norm": 4.793231833957601, | |
| "learning_rate": 3.5742971887550204e-06, | |
| "loss": 0.6589, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.07242003621001811, | |
| "grad_norm": 4.897332398174339, | |
| "learning_rate": 3.5943775100401606e-06, | |
| "loss": 0.5835, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07282236974451821, | |
| "grad_norm": 5.461164108231627, | |
| "learning_rate": 3.6144578313253016e-06, | |
| "loss": 0.6153, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.07322470327901831, | |
| "grad_norm": 4.750713371821981, | |
| "learning_rate": 3.6345381526104423e-06, | |
| "loss": 0.5576, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.07362703681351841, | |
| "grad_norm": 4.612815212986164, | |
| "learning_rate": 3.6546184738955825e-06, | |
| "loss": 0.6695, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.07402937034801851, | |
| "grad_norm": 5.035137988160493, | |
| "learning_rate": 3.6746987951807235e-06, | |
| "loss": 0.6451, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.0744317038825186, | |
| "grad_norm": 5.189253752749726, | |
| "learning_rate": 3.6947791164658637e-06, | |
| "loss": 0.6897, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.0748340374170187, | |
| "grad_norm": 5.061038391462879, | |
| "learning_rate": 3.7148594377510043e-06, | |
| "loss": 0.7363, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.0752363709515188, | |
| "grad_norm": 5.12457801565824, | |
| "learning_rate": 3.7349397590361445e-06, | |
| "loss": 0.6662, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.0756387044860189, | |
| "grad_norm": 5.468566358022457, | |
| "learning_rate": 3.7550200803212856e-06, | |
| "loss": 0.7333, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.07604103802051901, | |
| "grad_norm": 4.83323388980342, | |
| "learning_rate": 3.7751004016064258e-06, | |
| "loss": 0.6179, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.07644337155501911, | |
| "grad_norm": 5.618759075616741, | |
| "learning_rate": 3.7951807228915664e-06, | |
| "loss": 0.7861, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07684570508951921, | |
| "grad_norm": 5.571721878536531, | |
| "learning_rate": 3.8152610441767074e-06, | |
| "loss": 0.6481, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.07724803862401931, | |
| "grad_norm": 5.067103682358002, | |
| "learning_rate": 3.835341365461848e-06, | |
| "loss": 0.7295, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.07765037215851942, | |
| "grad_norm": 4.945134722999282, | |
| "learning_rate": 3.855421686746989e-06, | |
| "loss": 0.6275, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.07805270569301952, | |
| "grad_norm": 4.995120702157204, | |
| "learning_rate": 3.875502008032129e-06, | |
| "loss": 0.6156, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.07845503922751962, | |
| "grad_norm": 4.936376571967402, | |
| "learning_rate": 3.89558232931727e-06, | |
| "loss": 0.6141, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.07885737276201972, | |
| "grad_norm": 5.014965363677265, | |
| "learning_rate": 3.91566265060241e-06, | |
| "loss": 0.6706, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.07925970629651982, | |
| "grad_norm": 4.379077775855287, | |
| "learning_rate": 3.93574297188755e-06, | |
| "loss": 0.625, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.07966203983101991, | |
| "grad_norm": 5.213065143955326, | |
| "learning_rate": 3.9558232931726905e-06, | |
| "loss": 0.6259, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.08006437336552001, | |
| "grad_norm": 4.943005393732397, | |
| "learning_rate": 3.975903614457832e-06, | |
| "loss": 0.6867, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.08046670690002011, | |
| "grad_norm": 6.1486973779873795, | |
| "learning_rate": 3.995983935742972e-06, | |
| "loss": 0.8347, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.08086904043452021, | |
| "grad_norm": 4.733552314219692, | |
| "learning_rate": 4.016064257028113e-06, | |
| "loss": 0.6611, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.08127137396902032, | |
| "grad_norm": 4.8269187112270195, | |
| "learning_rate": 4.036144578313254e-06, | |
| "loss": 0.6939, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.08167370750352042, | |
| "grad_norm": 5.3439700963806125, | |
| "learning_rate": 4.056224899598394e-06, | |
| "loss": 0.7133, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.08207604103802052, | |
| "grad_norm": 5.29998702305394, | |
| "learning_rate": 4.076305220883534e-06, | |
| "loss": 0.7792, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.08247837457252062, | |
| "grad_norm": 5.408216152846704, | |
| "learning_rate": 4.096385542168675e-06, | |
| "loss": 0.6411, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.08288070810702072, | |
| "grad_norm": 4.419630999712613, | |
| "learning_rate": 4.1164658634538155e-06, | |
| "loss": 0.615, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.08328304164152082, | |
| "grad_norm": 4.543564292966689, | |
| "learning_rate": 4.136546184738956e-06, | |
| "loss": 0.5462, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.08368537517602093, | |
| "grad_norm": 4.814136944508732, | |
| "learning_rate": 4.156626506024097e-06, | |
| "loss": 0.6407, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.08408770871052103, | |
| "grad_norm": 5.118958059055448, | |
| "learning_rate": 4.176706827309237e-06, | |
| "loss": 0.6922, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.08449004224502112, | |
| "grad_norm": 5.761968321529592, | |
| "learning_rate": 4.196787148594378e-06, | |
| "loss": 0.7785, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08489237577952122, | |
| "grad_norm": 4.7973887668760185, | |
| "learning_rate": 4.216867469879519e-06, | |
| "loss": 0.5855, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.08529470931402132, | |
| "grad_norm": 5.063119851943765, | |
| "learning_rate": 4.236947791164659e-06, | |
| "loss": 0.5061, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.08569704284852142, | |
| "grad_norm": 5.185457141038217, | |
| "learning_rate": 4.2570281124497995e-06, | |
| "loss": 0.6547, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.08609937638302152, | |
| "grad_norm": 4.908063923355144, | |
| "learning_rate": 4.27710843373494e-06, | |
| "loss": 0.6325, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.08650170991752162, | |
| "grad_norm": 4.902236509892525, | |
| "learning_rate": 4.297188755020081e-06, | |
| "loss": 0.771, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.08690404345202173, | |
| "grad_norm": 4.939421470208826, | |
| "learning_rate": 4.317269076305221e-06, | |
| "loss": 0.6009, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.08730637698652183, | |
| "grad_norm": 5.585627020639388, | |
| "learning_rate": 4.337349397590362e-06, | |
| "loss": 0.75, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.08770871052102193, | |
| "grad_norm": 5.768733340993033, | |
| "learning_rate": 4.357429718875502e-06, | |
| "loss": 0.8889, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.08811104405552203, | |
| "grad_norm": 4.756073859984964, | |
| "learning_rate": 4.377510040160643e-06, | |
| "loss": 0.5845, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.08851337759002213, | |
| "grad_norm": 5.602957070495176, | |
| "learning_rate": 4.397590361445783e-06, | |
| "loss": 0.5766, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08891571112452223, | |
| "grad_norm": 5.51373108651177, | |
| "learning_rate": 4.4176706827309244e-06, | |
| "loss": 0.667, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.08931804465902234, | |
| "grad_norm": 5.2818421493270895, | |
| "learning_rate": 4.437751004016065e-06, | |
| "loss": 0.6132, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.08972037819352242, | |
| "grad_norm": 4.687481374933495, | |
| "learning_rate": 4.457831325301205e-06, | |
| "loss": 0.69, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.09012271172802253, | |
| "grad_norm": 5.045402036516882, | |
| "learning_rate": 4.477911646586346e-06, | |
| "loss": 0.5496, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.09052504526252263, | |
| "grad_norm": 4.949655367217769, | |
| "learning_rate": 4.497991967871486e-06, | |
| "loss": 0.6777, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.09092737879702273, | |
| "grad_norm": 4.725446380096972, | |
| "learning_rate": 4.518072289156627e-06, | |
| "loss": 0.5211, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.09132971233152283, | |
| "grad_norm": 5.627747083860127, | |
| "learning_rate": 4.538152610441767e-06, | |
| "loss": 0.7335, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.09173204586602293, | |
| "grad_norm": 4.658915831199445, | |
| "learning_rate": 4.558232931726908e-06, | |
| "loss": 0.618, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.09213437940052303, | |
| "grad_norm": 5.169380307261355, | |
| "learning_rate": 4.578313253012049e-06, | |
| "loss": 0.8083, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.09253671293502314, | |
| "grad_norm": 4.462280637097748, | |
| "learning_rate": 4.598393574297189e-06, | |
| "loss": 0.5873, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09293904646952324, | |
| "grad_norm": 5.088180514844276, | |
| "learning_rate": 4.61847389558233e-06, | |
| "loss": 0.6948, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.09334138000402334, | |
| "grad_norm": 4.911645340461795, | |
| "learning_rate": 4.63855421686747e-06, | |
| "loss": 0.5014, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.09374371353852344, | |
| "grad_norm": 5.183650867648958, | |
| "learning_rate": 4.658634538152611e-06, | |
| "loss": 0.6731, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.09414604707302354, | |
| "grad_norm": 4.942647346081943, | |
| "learning_rate": 4.678714859437751e-06, | |
| "loss": 0.796, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.09454838060752364, | |
| "grad_norm": 5.413754367435916, | |
| "learning_rate": 4.698795180722892e-06, | |
| "loss": 0.7029, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.09495071414202373, | |
| "grad_norm": 5.394996509930947, | |
| "learning_rate": 4.7188755020080325e-06, | |
| "loss": 0.7448, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.09535304767652383, | |
| "grad_norm": 4.807099705141704, | |
| "learning_rate": 4.7389558232931736e-06, | |
| "loss": 0.6608, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.09575538121102393, | |
| "grad_norm": 5.204336077501797, | |
| "learning_rate": 4.759036144578314e-06, | |
| "loss": 0.585, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.09615771474552404, | |
| "grad_norm": 4.845053158029385, | |
| "learning_rate": 4.779116465863454e-06, | |
| "loss": 0.6299, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.09656004828002414, | |
| "grad_norm": 5.037832345926416, | |
| "learning_rate": 4.799196787148594e-06, | |
| "loss": 0.6598, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09696238181452424, | |
| "grad_norm": 4.930571962391263, | |
| "learning_rate": 4.819277108433735e-06, | |
| "loss": 0.5918, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.09736471534902434, | |
| "grad_norm": 4.749158619882368, | |
| "learning_rate": 4.839357429718876e-06, | |
| "loss": 0.5785, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.09776704888352444, | |
| "grad_norm": 5.774299138017609, | |
| "learning_rate": 4.8594377510040165e-06, | |
| "loss": 0.7465, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.09816938241802455, | |
| "grad_norm": 4.582929546310413, | |
| "learning_rate": 4.8795180722891575e-06, | |
| "loss": 0.5464, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.09857171595252465, | |
| "grad_norm": 4.604283671045787, | |
| "learning_rate": 4.899598393574298e-06, | |
| "loss": 0.6183, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.09897404948702475, | |
| "grad_norm": 5.126092092509994, | |
| "learning_rate": 4.919678714859438e-06, | |
| "loss": 0.7267, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.09937638302152485, | |
| "grad_norm": 4.6087281951772345, | |
| "learning_rate": 4.939759036144578e-06, | |
| "loss": 0.5911, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.09977871655602494, | |
| "grad_norm": 4.4308954136706165, | |
| "learning_rate": 4.959839357429719e-06, | |
| "loss": 0.6525, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.10018105009052504, | |
| "grad_norm": 4.704767612087696, | |
| "learning_rate": 4.979919678714859e-06, | |
| "loss": 0.7537, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.10058338362502514, | |
| "grad_norm": 5.171569719861641, | |
| "learning_rate": 5e-06, | |
| "loss": 0.5941, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10098571715952524, | |
| "grad_norm": 5.7449160884679245, | |
| "learning_rate": 4.99999753465491e-06, | |
| "loss": 0.6686, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.10138805069402534, | |
| "grad_norm": 4.648308764039487, | |
| "learning_rate": 4.999990138624503e-06, | |
| "loss": 0.6264, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.10179038422852545, | |
| "grad_norm": 5.064339952308565, | |
| "learning_rate": 4.999977811923365e-06, | |
| "loss": 0.6921, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.10219271776302555, | |
| "grad_norm": 4.810326779178174, | |
| "learning_rate": 4.9999605545758065e-06, | |
| "loss": 0.6119, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.10259505129752565, | |
| "grad_norm": 4.805314994215823, | |
| "learning_rate": 4.999938366615867e-06, | |
| "loss": 0.6364, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.10299738483202575, | |
| "grad_norm": 4.473992853756316, | |
| "learning_rate": 4.999911248087305e-06, | |
| "loss": 0.5508, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.10339971836652585, | |
| "grad_norm": 5.452559581429947, | |
| "learning_rate": 4.999879199043606e-06, | |
| "loss": 0.7831, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.10380205190102595, | |
| "grad_norm": 5.061524793864594, | |
| "learning_rate": 4.99984221954798e-06, | |
| "loss": 0.5644, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.10420438543552606, | |
| "grad_norm": 4.673975167806026, | |
| "learning_rate": 4.999800309673361e-06, | |
| "loss": 0.649, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.10460671897002616, | |
| "grad_norm": 4.8181768852216855, | |
| "learning_rate": 4.999753469502406e-06, | |
| "loss": 0.6528, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10500905250452625, | |
| "grad_norm": 5.3922298074237345, | |
| "learning_rate": 4.999701699127497e-06, | |
| "loss": 0.8216, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.10541138603902635, | |
| "grad_norm": 4.463138014998754, | |
| "learning_rate": 4.99964499865074e-06, | |
| "loss": 0.6824, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.10581371957352645, | |
| "grad_norm": 4.937246753810877, | |
| "learning_rate": 4.999583368183965e-06, | |
| "loss": 0.5256, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.10621605310802655, | |
| "grad_norm": 5.02716535200141, | |
| "learning_rate": 4.999516807848721e-06, | |
| "loss": 0.6658, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.10661838664252665, | |
| "grad_norm": 4.719903057858775, | |
| "learning_rate": 4.999445317776286e-06, | |
| "loss": 0.5943, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.10702072017702675, | |
| "grad_norm": 4.327891289019763, | |
| "learning_rate": 4.9993688981076574e-06, | |
| "loss": 0.6267, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.10742305371152686, | |
| "grad_norm": 5.04792717283726, | |
| "learning_rate": 4.999287548993555e-06, | |
| "loss": 0.7649, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.10782538724602696, | |
| "grad_norm": 4.398384896034094, | |
| "learning_rate": 4.999201270594424e-06, | |
| "loss": 0.6237, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.10822772078052706, | |
| "grad_norm": 4.513204332172601, | |
| "learning_rate": 4.999110063080428e-06, | |
| "loss": 0.685, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.10863005431502716, | |
| "grad_norm": 4.910504259364484, | |
| "learning_rate": 4.999013926631453e-06, | |
| "loss": 0.6393, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.10903238784952726, | |
| "grad_norm": 3.8051932823856927, | |
| "learning_rate": 4.998912861437106e-06, | |
| "loss": 0.557, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.10943472138402736, | |
| "grad_norm": 4.261798228362777, | |
| "learning_rate": 4.9988068676967176e-06, | |
| "loss": 0.6685, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.10983705491852747, | |
| "grad_norm": 5.199991754679721, | |
| "learning_rate": 4.9986959456193345e-06, | |
| "loss": 0.6926, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.11023938845302755, | |
| "grad_norm": 4.800143443642004, | |
| "learning_rate": 4.998580095423728e-06, | |
| "loss": 0.6262, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.11064172198752766, | |
| "grad_norm": 5.703676474195908, | |
| "learning_rate": 4.998459317338383e-06, | |
| "loss": 0.8102, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.11104405552202776, | |
| "grad_norm": 5.554521472398849, | |
| "learning_rate": 4.998333611601511e-06, | |
| "loss": 0.8502, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.11144638905652786, | |
| "grad_norm": 5.223628882218702, | |
| "learning_rate": 4.998202978461038e-06, | |
| "loss": 0.6968, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.11184872259102796, | |
| "grad_norm": 5.028503778823592, | |
| "learning_rate": 4.9980674181746055e-06, | |
| "loss": 0.6237, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.11225105612552806, | |
| "grad_norm": 4.810062581233454, | |
| "learning_rate": 4.997926931009579e-06, | |
| "loss": 0.6566, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.11265338966002816, | |
| "grad_norm": 4.895795240240787, | |
| "learning_rate": 4.997781517243037e-06, | |
| "loss": 0.6358, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11305572319452827, | |
| "grad_norm": 4.678689813671165, | |
| "learning_rate": 4.997631177161775e-06, | |
| "loss": 0.7098, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.11345805672902837, | |
| "grad_norm": 4.8363206866892465, | |
| "learning_rate": 4.9974759110623065e-06, | |
| "loss": 0.7234, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.11386039026352847, | |
| "grad_norm": 4.872936830415585, | |
| "learning_rate": 4.997315719250857e-06, | |
| "loss": 0.5937, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.11426272379802857, | |
| "grad_norm": 4.28046131366056, | |
| "learning_rate": 4.997150602043371e-06, | |
| "loss": 0.6211, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.11466505733252867, | |
| "grad_norm": 4.837073628599811, | |
| "learning_rate": 4.996980559765504e-06, | |
| "loss": 0.5828, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.11506739086702877, | |
| "grad_norm": 4.889286615585772, | |
| "learning_rate": 4.996805592752626e-06, | |
| "loss": 0.7301, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.11546972440152886, | |
| "grad_norm": 3.885336153971243, | |
| "learning_rate": 4.996625701349822e-06, | |
| "loss": 0.5091, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.11587205793602896, | |
| "grad_norm": 4.738737155243486, | |
| "learning_rate": 4.996440885911886e-06, | |
| "loss": 0.5519, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.11627439147052906, | |
| "grad_norm": 4.600896924297193, | |
| "learning_rate": 4.996251146803326e-06, | |
| "loss": 0.5357, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.11667672500502917, | |
| "grad_norm": 4.348185410245171, | |
| "learning_rate": 4.996056484398359e-06, | |
| "loss": 0.606, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.11707905853952927, | |
| "grad_norm": 4.609344088685658, | |
| "learning_rate": 4.995856899080914e-06, | |
| "loss": 0.7079, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.11748139207402937, | |
| "grad_norm": 4.982538761307109, | |
| "learning_rate": 4.9956523912446274e-06, | |
| "loss": 0.6626, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.11788372560852947, | |
| "grad_norm": 4.958292330122082, | |
| "learning_rate": 4.9954429612928455e-06, | |
| "loss": 0.5589, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.11828605914302957, | |
| "grad_norm": 5.3063992889234015, | |
| "learning_rate": 4.995228609638623e-06, | |
| "loss": 0.6761, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.11868839267752967, | |
| "grad_norm": 4.787491132159558, | |
| "learning_rate": 4.995009336704718e-06, | |
| "loss": 0.6907, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.11909072621202978, | |
| "grad_norm": 5.042532894966667, | |
| "learning_rate": 4.994785142923599e-06, | |
| "loss": 0.7259, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.11949305974652988, | |
| "grad_norm": 5.24654264942669, | |
| "learning_rate": 4.994556028737439e-06, | |
| "loss": 0.7376, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.11989539328102998, | |
| "grad_norm": 4.7381264053196475, | |
| "learning_rate": 4.9943219945981126e-06, | |
| "loss": 0.6419, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.12029772681553007, | |
| "grad_norm": 4.174895491105691, | |
| "learning_rate": 4.9940830409672e-06, | |
| "loss": 0.5846, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.12070006035003017, | |
| "grad_norm": 5.265350639824991, | |
| "learning_rate": 4.993839168315985e-06, | |
| "loss": 0.7269, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12110239388453027, | |
| "grad_norm": 5.293670654130999, | |
| "learning_rate": 4.99359037712545e-06, | |
| "loss": 0.7539, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.12150472741903037, | |
| "grad_norm": 4.832726818218027, | |
| "learning_rate": 4.993336667886281e-06, | |
| "loss": 0.6617, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.12190706095353047, | |
| "grad_norm": 4.982756198499471, | |
| "learning_rate": 4.993078041098862e-06, | |
| "loss": 0.6517, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.12230939448803058, | |
| "grad_norm": 4.787146336142149, | |
| "learning_rate": 4.9928144972732785e-06, | |
| "loss": 0.5943, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.12271172802253068, | |
| "grad_norm": 5.725699716607812, | |
| "learning_rate": 4.992546036929309e-06, | |
| "loss": 0.7385, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.12311406155703078, | |
| "grad_norm": 4.388246603675133, | |
| "learning_rate": 4.992272660596432e-06, | |
| "loss": 0.5268, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.12351639509153088, | |
| "grad_norm": 4.323516644235707, | |
| "learning_rate": 4.991994368813823e-06, | |
| "loss": 0.5069, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.12391872862603098, | |
| "grad_norm": 4.968139714274168, | |
| "learning_rate": 4.991711162130347e-06, | |
| "loss": 0.6089, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.12432106216053108, | |
| "grad_norm": 5.393689160165955, | |
| "learning_rate": 4.99142304110457e-06, | |
| "loss": 0.5749, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.12472339569503119, | |
| "grad_norm": 5.010889684729688, | |
| "learning_rate": 4.991130006304742e-06, | |
| "loss": 0.6868, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.12512572922953127, | |
| "grad_norm": 4.77584804740267, | |
| "learning_rate": 4.99083205830881e-06, | |
| "loss": 0.7153, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.1255280627640314, | |
| "grad_norm": 3.8671259573453343, | |
| "learning_rate": 4.990529197704411e-06, | |
| "loss": 0.4938, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.12593039629853148, | |
| "grad_norm": 4.878545701273808, | |
| "learning_rate": 4.990221425088868e-06, | |
| "loss": 0.5954, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.1263327298330316, | |
| "grad_norm": 4.481594062259529, | |
| "learning_rate": 4.989908741069195e-06, | |
| "loss": 0.6397, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.12673506336753168, | |
| "grad_norm": 4.317626894827104, | |
| "learning_rate": 4.98959114626209e-06, | |
| "loss": 0.609, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.1271373969020318, | |
| "grad_norm": 4.415809282581258, | |
| "learning_rate": 4.989268641293939e-06, | |
| "loss": 0.5092, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.12753973043653188, | |
| "grad_norm": 4.4424914565481375, | |
| "learning_rate": 4.9889412268008096e-06, | |
| "loss": 0.6316, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.12794206397103197, | |
| "grad_norm": 4.431277408902586, | |
| "learning_rate": 4.988608903428454e-06, | |
| "loss": 0.6015, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.1283443975055321, | |
| "grad_norm": 4.618861534462686, | |
| "learning_rate": 4.988271671832305e-06, | |
| "loss": 0.5562, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.12874673104003218, | |
| "grad_norm": 5.078731601531793, | |
| "learning_rate": 4.987929532677478e-06, | |
| "loss": 0.7308, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1291490645745323, | |
| "grad_norm": 4.814095780713649, | |
| "learning_rate": 4.987582486638763e-06, | |
| "loss": 0.6856, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.12955139810903238, | |
| "grad_norm": 5.171953766801144, | |
| "learning_rate": 4.987230534400634e-06, | |
| "loss": 0.7413, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.1299537316435325, | |
| "grad_norm": 4.527962518284751, | |
| "learning_rate": 4.986873676657237e-06, | |
| "loss": 0.5227, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.13035606517803258, | |
| "grad_norm": 4.9194193843235405, | |
| "learning_rate": 4.986511914112392e-06, | |
| "loss": 0.6038, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.1307583987125327, | |
| "grad_norm": 4.403379209157847, | |
| "learning_rate": 4.986145247479597e-06, | |
| "loss": 0.6624, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.13116073224703279, | |
| "grad_norm": 4.784759384702467, | |
| "learning_rate": 4.985773677482018e-06, | |
| "loss": 0.7109, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.1315630657815329, | |
| "grad_norm": 4.928746115672455, | |
| "learning_rate": 4.985397204852496e-06, | |
| "loss": 0.6241, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.131965399316033, | |
| "grad_norm": 5.6685753103245755, | |
| "learning_rate": 4.985015830333536e-06, | |
| "loss": 0.7121, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.1323677328505331, | |
| "grad_norm": 4.723811520101061, | |
| "learning_rate": 4.984629554677316e-06, | |
| "loss": 0.5683, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.1327700663850332, | |
| "grad_norm": 4.106210405752034, | |
| "learning_rate": 4.984238378645677e-06, | |
| "loss": 0.6128, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.13317239991953328, | |
| "grad_norm": 5.439987884236508, | |
| "learning_rate": 4.983842303010127e-06, | |
| "loss": 0.7294, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.1335747334540334, | |
| "grad_norm": 4.76281391948892, | |
| "learning_rate": 4.9834413285518365e-06, | |
| "loss": 0.6116, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.13397706698853348, | |
| "grad_norm": 4.594103270680001, | |
| "learning_rate": 4.983035456061637e-06, | |
| "loss": 0.5509, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.1343794005230336, | |
| "grad_norm": 4.8229418474364785, | |
| "learning_rate": 4.982624686340022e-06, | |
| "loss": 0.6565, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.1347817340575337, | |
| "grad_norm": 4.572125613909981, | |
| "learning_rate": 4.9822090201971414e-06, | |
| "loss": 0.5739, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.1351840675920338, | |
| "grad_norm": 4.657722588184461, | |
| "learning_rate": 4.981788458452806e-06, | |
| "loss": 0.716, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.1355864011265339, | |
| "grad_norm": 4.485714819813409, | |
| "learning_rate": 4.9813630019364765e-06, | |
| "loss": 0.5809, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.135988734661034, | |
| "grad_norm": 4.730520981461843, | |
| "learning_rate": 4.9809326514872735e-06, | |
| "loss": 0.6006, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.1363910681955341, | |
| "grad_norm": 4.407751381335739, | |
| "learning_rate": 4.980497407953966e-06, | |
| "loss": 0.6979, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.1367934017300342, | |
| "grad_norm": 5.036488553382197, | |
| "learning_rate": 4.980057272194973e-06, | |
| "loss": 0.6719, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1371957352645343, | |
| "grad_norm": 4.131020183485464, | |
| "learning_rate": 4.979612245078366e-06, | |
| "loss": 0.656, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.1375980687990344, | |
| "grad_norm": 4.494385344326802, | |
| "learning_rate": 4.979162327481859e-06, | |
| "loss": 0.5824, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.1380004023335345, | |
| "grad_norm": 4.887767115382313, | |
| "learning_rate": 4.9787075202928155e-06, | |
| "loss": 0.7065, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.1384027358680346, | |
| "grad_norm": 4.929080555596624, | |
| "learning_rate": 4.97824782440824e-06, | |
| "loss": 0.6462, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.1388050694025347, | |
| "grad_norm": 4.768200287000246, | |
| "learning_rate": 4.97778324073478e-06, | |
| "loss": 0.7052, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.1392074029370348, | |
| "grad_norm": 4.824866676754677, | |
| "learning_rate": 4.977313770188723e-06, | |
| "loss": 0.6169, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.1396097364715349, | |
| "grad_norm": 5.0730067342381515, | |
| "learning_rate": 4.976839413695994e-06, | |
| "loss": 0.6907, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.140012070006035, | |
| "grad_norm": 4.400048281159567, | |
| "learning_rate": 4.976360172192156e-06, | |
| "loss": 0.6843, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.1404144035405351, | |
| "grad_norm": 4.826940942258961, | |
| "learning_rate": 4.975876046622404e-06, | |
| "loss": 0.6425, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.1408167370750352, | |
| "grad_norm": 4.1980516292894645, | |
| "learning_rate": 4.975387037941568e-06, | |
| "loss": 0.513, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1412190706095353, | |
| "grad_norm": 4.599176289887721, | |
| "learning_rate": 4.974893147114108e-06, | |
| "loss": 0.6138, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.1416214041440354, | |
| "grad_norm": 5.224094683564219, | |
| "learning_rate": 4.974394375114113e-06, | |
| "loss": 0.7688, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.14202373767853552, | |
| "grad_norm": 4.886795665659055, | |
| "learning_rate": 4.9738907229253e-06, | |
| "loss": 0.6447, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.1424260712130356, | |
| "grad_norm": 4.686162997153853, | |
| "learning_rate": 4.973382191541008e-06, | |
| "loss": 0.6489, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.14282840474753572, | |
| "grad_norm": 4.633889069385792, | |
| "learning_rate": 4.972868781964204e-06, | |
| "loss": 0.6841, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.1432307382820358, | |
| "grad_norm": 4.714123893258144, | |
| "learning_rate": 4.972350495207472e-06, | |
| "loss": 0.7282, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.1436330718165359, | |
| "grad_norm": 4.620909154154889, | |
| "learning_rate": 4.971827332293017e-06, | |
| "loss": 0.6267, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.144035405351036, | |
| "grad_norm": 5.112805770554144, | |
| "learning_rate": 4.971299294252659e-06, | |
| "loss": 0.7187, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.1444377388855361, | |
| "grad_norm": 4.491917871078975, | |
| "learning_rate": 4.970766382127838e-06, | |
| "loss": 0.6243, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.14484007242003621, | |
| "grad_norm": 4.91569774829319, | |
| "learning_rate": 4.9702285969696006e-06, | |
| "loss": 0.6044, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1452424059545363, | |
| "grad_norm": 4.347791813311231, | |
| "learning_rate": 4.96968593983861e-06, | |
| "loss": 0.6496, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.14564473948903642, | |
| "grad_norm": 4.6534811500518645, | |
| "learning_rate": 4.9691384118051346e-06, | |
| "loss": 0.7414, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.1460470730235365, | |
| "grad_norm": 4.5918592667861455, | |
| "learning_rate": 4.968586013949051e-06, | |
| "loss": 0.5802, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.14644940655803662, | |
| "grad_norm": 4.435482175102624, | |
| "learning_rate": 4.96802874735984e-06, | |
| "loss": 0.5162, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.1468517400925367, | |
| "grad_norm": 4.407001515831722, | |
| "learning_rate": 4.967466613136586e-06, | |
| "loss": 0.5361, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.14725407362703682, | |
| "grad_norm": 4.369142207445966, | |
| "learning_rate": 4.966899612387972e-06, | |
| "loss": 0.664, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.1476564071615369, | |
| "grad_norm": 4.607767758241303, | |
| "learning_rate": 4.966327746232281e-06, | |
| "loss": 0.5882, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.14805874069603703, | |
| "grad_norm": 4.938226519089643, | |
| "learning_rate": 4.96575101579739e-06, | |
| "loss": 0.5841, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.14846107423053712, | |
| "grad_norm": 4.096479564143721, | |
| "learning_rate": 4.965169422220771e-06, | |
| "loss": 0.5691, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.1488634077650372, | |
| "grad_norm": 4.775093512712085, | |
| "learning_rate": 4.964582966649488e-06, | |
| "loss": 0.6063, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.14926574129953732, | |
| "grad_norm": 4.211658294897708, | |
| "learning_rate": 4.963991650240192e-06, | |
| "loss": 0.5215, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.1496680748340374, | |
| "grad_norm": 4.364777540143213, | |
| "learning_rate": 4.963395474159122e-06, | |
| "loss": 0.4866, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.15007040836853752, | |
| "grad_norm": 4.681087632312734, | |
| "learning_rate": 4.962794439582102e-06, | |
| "loss": 0.6122, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.1504727419030376, | |
| "grad_norm": 5.077820479708522, | |
| "learning_rate": 4.9621885476945405e-06, | |
| "loss": 0.7555, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.15087507543753773, | |
| "grad_norm": 5.978600203961224, | |
| "learning_rate": 4.961577799691421e-06, | |
| "loss": 0.7113, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1512774089720378, | |
| "grad_norm": 5.376960173895862, | |
| "learning_rate": 4.960962196777307e-06, | |
| "loss": 0.577, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.15167974250653793, | |
| "grad_norm": 5.18055770132837, | |
| "learning_rate": 4.960341740166338e-06, | |
| "loss": 0.6625, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.15208207604103802, | |
| "grad_norm": 4.460009104960063, | |
| "learning_rate": 4.959716431082227e-06, | |
| "loss": 0.5306, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.15248440957553813, | |
| "grad_norm": 4.303191111673615, | |
| "learning_rate": 4.959086270758255e-06, | |
| "loss": 0.5604, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.15288674311003822, | |
| "grad_norm": 4.8364861459301185, | |
| "learning_rate": 4.9584512604372704e-06, | |
| "loss": 0.6154, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.15328907664453834, | |
| "grad_norm": 4.638041078223287, | |
| "learning_rate": 4.957811401371692e-06, | |
| "loss": 0.5852, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.15369141017903842, | |
| "grad_norm": 4.8155087041398295, | |
| "learning_rate": 4.9571666948234975e-06, | |
| "loss": 0.591, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.1540937437135385, | |
| "grad_norm": 4.365724907978, | |
| "learning_rate": 4.956517142064226e-06, | |
| "loss": 0.5645, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.15449607724803863, | |
| "grad_norm": 4.800750875035706, | |
| "learning_rate": 4.955862744374974e-06, | |
| "loss": 0.6958, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.15489841078253871, | |
| "grad_norm": 4.218913109898912, | |
| "learning_rate": 4.9552035030463955e-06, | |
| "loss": 0.5981, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.15530074431703883, | |
| "grad_norm": 3.8608056472996775, | |
| "learning_rate": 4.954539419378695e-06, | |
| "loss": 0.5516, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.15570307785153892, | |
| "grad_norm": 4.422539247772956, | |
| "learning_rate": 4.953870494681631e-06, | |
| "loss": 0.466, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.15610541138603903, | |
| "grad_norm": 5.389694964679017, | |
| "learning_rate": 4.9531967302745065e-06, | |
| "loss": 0.7187, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.15650774492053912, | |
| "grad_norm": 4.641539036731872, | |
| "learning_rate": 4.952518127486171e-06, | |
| "loss": 0.6118, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.15691007845503924, | |
| "grad_norm": 4.38792439545412, | |
| "learning_rate": 4.951834687655016e-06, | |
| "loss": 0.6125, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.15731241198953932, | |
| "grad_norm": 4.859276712096591, | |
| "learning_rate": 4.9511464121289745e-06, | |
| "loss": 0.789, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.15771474552403944, | |
| "grad_norm": 4.123781252290954, | |
| "learning_rate": 4.950453302265516e-06, | |
| "loss": 0.5189, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.15811707905853953, | |
| "grad_norm": 4.5665024165035915, | |
| "learning_rate": 4.949755359431643e-06, | |
| "loss": 0.5843, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.15851941259303964, | |
| "grad_norm": 4.554644389766188, | |
| "learning_rate": 4.949052585003892e-06, | |
| "loss": 0.5165, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.15892174612753973, | |
| "grad_norm": 4.652445776343059, | |
| "learning_rate": 4.9483449803683295e-06, | |
| "loss": 0.6496, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.15932407966203982, | |
| "grad_norm": 4.155340040403773, | |
| "learning_rate": 4.947632546920545e-06, | |
| "loss": 0.5823, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.15972641319653993, | |
| "grad_norm": 5.685373741993176, | |
| "learning_rate": 4.946915286065656e-06, | |
| "loss": 0.7257, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.16012874673104002, | |
| "grad_norm": 3.794107938623536, | |
| "learning_rate": 4.946193199218298e-06, | |
| "loss": 0.5539, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.16053108026554014, | |
| "grad_norm": 3.345825002647911, | |
| "learning_rate": 4.945466287802625e-06, | |
| "loss": 0.4522, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.16093341380004023, | |
| "grad_norm": 4.499068362412442, | |
| "learning_rate": 4.944734553252308e-06, | |
| "loss": 0.7294, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.16133574733454034, | |
| "grad_norm": 4.710035321209727, | |
| "learning_rate": 4.94399799701053e-06, | |
| "loss": 0.6495, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.16173808086904043, | |
| "grad_norm": 4.665060195725978, | |
| "learning_rate": 4.943256620529983e-06, | |
| "loss": 0.5549, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.16214041440354054, | |
| "grad_norm": 4.526568640183799, | |
| "learning_rate": 4.942510425272864e-06, | |
| "loss": 0.5342, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.16254274793804063, | |
| "grad_norm": 4.500912340574114, | |
| "learning_rate": 4.941759412710878e-06, | |
| "loss": 0.5154, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.16294508147254075, | |
| "grad_norm": 4.871395743184042, | |
| "learning_rate": 4.941003584325229e-06, | |
| "loss": 0.7273, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.16334741500704084, | |
| "grad_norm": 5.226443625458406, | |
| "learning_rate": 4.940242941606619e-06, | |
| "loss": 0.5695, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.16374974854154092, | |
| "grad_norm": 4.3424277680190215, | |
| "learning_rate": 4.939477486055245e-06, | |
| "loss": 0.6903, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.16415208207604104, | |
| "grad_norm": 4.63055317361875, | |
| "learning_rate": 4.938707219180797e-06, | |
| "loss": 0.7724, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.16455441561054113, | |
| "grad_norm": 4.632025607743007, | |
| "learning_rate": 4.9379321425024545e-06, | |
| "loss": 0.5579, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.16495674914504124, | |
| "grad_norm": 4.949408477476733, | |
| "learning_rate": 4.9371522575488814e-06, | |
| "loss": 0.5585, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.16535908267954133, | |
| "grad_norm": 4.82833151193623, | |
| "learning_rate": 4.936367565858228e-06, | |
| "loss": 0.6281, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.16576141621404145, | |
| "grad_norm": 4.135170899195267, | |
| "learning_rate": 4.935578068978121e-06, | |
| "loss": 0.5236, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.16616374974854153, | |
| "grad_norm": 4.848865820547502, | |
| "learning_rate": 4.9347837684656675e-06, | |
| "loss": 0.7572, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.16656608328304165, | |
| "grad_norm": 4.717705787081838, | |
| "learning_rate": 4.933984665887447e-06, | |
| "loss": 0.7282, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.16696841681754174, | |
| "grad_norm": 4.528215697412672, | |
| "learning_rate": 4.93318076281951e-06, | |
| "loss": 0.5974, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.16737075035204185, | |
| "grad_norm": 5.165943035201723, | |
| "learning_rate": 4.9323720608473754e-06, | |
| "loss": 0.6622, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.16777308388654194, | |
| "grad_norm": 4.244718678666535, | |
| "learning_rate": 4.9315585615660275e-06, | |
| "loss": 0.5827, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.16817541742104206, | |
| "grad_norm": 5.152052375848677, | |
| "learning_rate": 4.930740266579911e-06, | |
| "loss": 0.6625, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.16857775095554214, | |
| "grad_norm": 4.55196067766976, | |
| "learning_rate": 4.92991717750293e-06, | |
| "loss": 0.5734, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.16898008449004223, | |
| "grad_norm": 5.130317188995626, | |
| "learning_rate": 4.929089295958442e-06, | |
| "loss": 0.6448, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.16938241802454235, | |
| "grad_norm": 4.221455100939359, | |
| "learning_rate": 4.928256623579259e-06, | |
| "loss": 0.5736, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.16978475155904243, | |
| "grad_norm": 5.042585074077865, | |
| "learning_rate": 4.9274191620076405e-06, | |
| "loss": 0.6948, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.17018708509354255, | |
| "grad_norm": 4.475716088239217, | |
| "learning_rate": 4.926576912895292e-06, | |
| "loss": 0.5762, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.17058941862804264, | |
| "grad_norm": 4.629681308629434, | |
| "learning_rate": 4.9257298779033615e-06, | |
| "loss": 0.7584, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.17099175216254275, | |
| "grad_norm": 4.624861162695507, | |
| "learning_rate": 4.924878058702436e-06, | |
| "loss": 0.6725, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.17139408569704284, | |
| "grad_norm": 4.729048209294549, | |
| "learning_rate": 4.9240214569725375e-06, | |
| "loss": 0.7722, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.17179641923154296, | |
| "grad_norm": 4.21954665313362, | |
| "learning_rate": 4.923160074403122e-06, | |
| "loss": 0.6253, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.17219875276604305, | |
| "grad_norm": 3.755869045635417, | |
| "learning_rate": 4.922293912693072e-06, | |
| "loss": 0.4626, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.17260108630054316, | |
| "grad_norm": 4.263780577574945, | |
| "learning_rate": 4.921422973550699e-06, | |
| "loss": 0.695, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.17300341983504325, | |
| "grad_norm": 5.108523767367576, | |
| "learning_rate": 4.920547258693735e-06, | |
| "loss": 0.7092, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.17340575336954336, | |
| "grad_norm": 4.317384116136861, | |
| "learning_rate": 4.919666769849332e-06, | |
| "loss": 0.4564, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.17380808690404345, | |
| "grad_norm": 5.268931042465572, | |
| "learning_rate": 4.918781508754057e-06, | |
| "loss": 0.7064, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.17421042043854354, | |
| "grad_norm": 4.295948103968784, | |
| "learning_rate": 4.9178914771538875e-06, | |
| "loss": 0.6892, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.17461275397304366, | |
| "grad_norm": 4.214776194153066, | |
| "learning_rate": 4.916996676804214e-06, | |
| "loss": 0.5877, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.17501508750754374, | |
| "grad_norm": 4.187051473349894, | |
| "learning_rate": 4.916097109469829e-06, | |
| "loss": 0.5542, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.17541742104204386, | |
| "grad_norm": 4.7370251117595235, | |
| "learning_rate": 4.915192776924926e-06, | |
| "loss": 0.6131, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.17581975457654395, | |
| "grad_norm": 4.790572516699059, | |
| "learning_rate": 4.9142836809531e-06, | |
| "loss": 0.6847, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.17622208811104406, | |
| "grad_norm": 4.024460804862858, | |
| "learning_rate": 4.91336982334734e-06, | |
| "loss": 0.4475, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.17662442164554415, | |
| "grad_norm": 4.675530435693111, | |
| "learning_rate": 4.912451205910024e-06, | |
| "loss": 0.641, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.17702675518004427, | |
| "grad_norm": 4.070435859439258, | |
| "learning_rate": 4.91152783045292e-06, | |
| "loss": 0.5715, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.17742908871454435, | |
| "grad_norm": 5.014878172443035, | |
| "learning_rate": 4.910599698797179e-06, | |
| "loss": 0.6227, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.17783142224904447, | |
| "grad_norm": 4.471540054012383, | |
| "learning_rate": 4.909666812773333e-06, | |
| "loss": 0.5145, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.17823375578354456, | |
| "grad_norm": 4.442414191534445, | |
| "learning_rate": 4.908729174221289e-06, | |
| "loss": 0.6688, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.17863608931804467, | |
| "grad_norm": 4.624231827213014, | |
| "learning_rate": 4.9077867849903325e-06, | |
| "loss": 0.6327, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.17903842285254476, | |
| "grad_norm": 4.743489707184984, | |
| "learning_rate": 4.906839646939113e-06, | |
| "loss": 0.7075, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.17944075638704485, | |
| "grad_norm": 4.200250704275894, | |
| "learning_rate": 4.905887761935649e-06, | |
| "loss": 0.5171, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.17984308992154496, | |
| "grad_norm": 4.0039914177118545, | |
| "learning_rate": 4.90493113185732e-06, | |
| "loss": 0.5334, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.18024542345604505, | |
| "grad_norm": 4.437438466205263, | |
| "learning_rate": 4.903969758590865e-06, | |
| "loss": 0.584, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.18064775699054517, | |
| "grad_norm": 4.668639275979892, | |
| "learning_rate": 4.9030036440323766e-06, | |
| "loss": 0.669, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.18105009052504525, | |
| "grad_norm": 4.262713251256841, | |
| "learning_rate": 4.902032790087301e-06, | |
| "loss": 0.518, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.18145242405954537, | |
| "grad_norm": 4.58562990715973, | |
| "learning_rate": 4.9010571986704295e-06, | |
| "loss": 0.5627, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.18185475759404546, | |
| "grad_norm": 4.2511574164879615, | |
| "learning_rate": 4.900076871705897e-06, | |
| "loss": 0.6189, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.18225709112854557, | |
| "grad_norm": 3.879096119034437, | |
| "learning_rate": 4.89909181112718e-06, | |
| "loss": 0.4688, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.18265942466304566, | |
| "grad_norm": 4.4698856213971645, | |
| "learning_rate": 4.898102018877088e-06, | |
| "loss": 0.6272, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.18306175819754578, | |
| "grad_norm": 4.25349966912043, | |
| "learning_rate": 4.897107496907767e-06, | |
| "loss": 0.4844, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.18346409173204586, | |
| "grad_norm": 5.180942020863308, | |
| "learning_rate": 4.896108247180688e-06, | |
| "loss": 0.7363, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.18386642526654598, | |
| "grad_norm": 4.464543366415725, | |
| "learning_rate": 4.895104271666647e-06, | |
| "loss": 0.507, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.18426875880104607, | |
| "grad_norm": 4.35656943725317, | |
| "learning_rate": 4.8940955723457604e-06, | |
| "loss": 0.5626, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.18467109233554616, | |
| "grad_norm": 4.46990880672421, | |
| "learning_rate": 4.893082151207464e-06, | |
| "loss": 0.5986, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.18507342587004627, | |
| "grad_norm": 4.315333956337568, | |
| "learning_rate": 4.8920640102505005e-06, | |
| "loss": 0.5399, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.18547575940454636, | |
| "grad_norm": 4.414262914713829, | |
| "learning_rate": 4.891041151482928e-06, | |
| "loss": 0.6866, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.18587809293904647, | |
| "grad_norm": 4.68767919716049, | |
| "learning_rate": 4.8900135769221045e-06, | |
| "loss": 0.6205, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.18628042647354656, | |
| "grad_norm": 4.55495109723687, | |
| "learning_rate": 4.888981288594692e-06, | |
| "loss": 0.6232, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.18668276000804668, | |
| "grad_norm": 5.072112114859536, | |
| "learning_rate": 4.887944288536648e-06, | |
| "loss": 0.6246, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.18708509354254677, | |
| "grad_norm": 4.598620261784645, | |
| "learning_rate": 4.886902578793221e-06, | |
| "loss": 0.6688, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.18748742707704688, | |
| "grad_norm": 4.156182800147685, | |
| "learning_rate": 4.885856161418953e-06, | |
| "loss": 0.6185, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.18788976061154697, | |
| "grad_norm": 5.123876789008787, | |
| "learning_rate": 4.8848050384776665e-06, | |
| "loss": 0.6485, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.18829209414604708, | |
| "grad_norm": 4.7877986741742555, | |
| "learning_rate": 4.883749212042466e-06, | |
| "loss": 0.6336, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.18869442768054717, | |
| "grad_norm": 4.170455407715718, | |
| "learning_rate": 4.882688684195732e-06, | |
| "loss": 0.5708, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.1890967612150473, | |
| "grad_norm": 4.394557070936156, | |
| "learning_rate": 4.881623457029121e-06, | |
| "loss": 0.6584, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.18949909474954738, | |
| "grad_norm": 4.4866621521500205, | |
| "learning_rate": 4.880553532643552e-06, | |
| "loss": 0.5816, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.18990142828404746, | |
| "grad_norm": 4.263089971509693, | |
| "learning_rate": 4.879478913149212e-06, | |
| "loss": 0.6563, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.19030376181854758, | |
| "grad_norm": 3.897184614846541, | |
| "learning_rate": 4.8783996006655485e-06, | |
| "loss": 0.5816, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.19070609535304767, | |
| "grad_norm": 4.759719975025391, | |
| "learning_rate": 4.877315597321263e-06, | |
| "loss": 0.7219, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.19110842888754778, | |
| "grad_norm": 4.561889671156211, | |
| "learning_rate": 4.876226905254309e-06, | |
| "loss": 0.6307, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.19151076242204787, | |
| "grad_norm": 4.622879218120913, | |
| "learning_rate": 4.875133526611888e-06, | |
| "loss": 0.5739, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.19191309595654799, | |
| "grad_norm": 4.183836330008031, | |
| "learning_rate": 4.874035463550445e-06, | |
| "loss": 0.4165, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.19231542949104807, | |
| "grad_norm": 4.431780943086593, | |
| "learning_rate": 4.872932718235663e-06, | |
| "loss": 0.5768, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.1927177630255482, | |
| "grad_norm": 4.700772397419956, | |
| "learning_rate": 4.87182529284246e-06, | |
| "loss": 0.6635, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.19312009656004828, | |
| "grad_norm": 4.0384190687435915, | |
| "learning_rate": 4.870713189554985e-06, | |
| "loss": 0.4994, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1935224300945484, | |
| "grad_norm": 4.16066989214431, | |
| "learning_rate": 4.869596410566614e-06, | |
| "loss": 0.523, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.19392476362904848, | |
| "grad_norm": 3.7758221004395565, | |
| "learning_rate": 4.8684749580799405e-06, | |
| "loss": 0.5106, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.1943270971635486, | |
| "grad_norm": 4.792256321316127, | |
| "learning_rate": 4.867348834306781e-06, | |
| "loss": 0.5955, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.19472943069804868, | |
| "grad_norm": 4.408528934091589, | |
| "learning_rate": 4.866218041468161e-06, | |
| "loss": 0.6032, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.19513176423254877, | |
| "grad_norm": 4.425894822386934, | |
| "learning_rate": 4.865082581794317e-06, | |
| "loss": 0.5298, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.1955340977670489, | |
| "grad_norm": 4.356368057153947, | |
| "learning_rate": 4.863942457524689e-06, | |
| "loss": 0.5381, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.19593643130154897, | |
| "grad_norm": 3.990737447917161, | |
| "learning_rate": 4.862797670907915e-06, | |
| "loss": 0.5049, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.1963387648360491, | |
| "grad_norm": 4.229237889406895, | |
| "learning_rate": 4.8616482242018325e-06, | |
| "loss": 0.5467, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.19674109837054918, | |
| "grad_norm": 4.126506367371427, | |
| "learning_rate": 4.860494119673466e-06, | |
| "loss": 0.5455, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.1971434319050493, | |
| "grad_norm": 5.231977983505604, | |
| "learning_rate": 4.859335359599031e-06, | |
| "loss": 0.7528, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.19754576543954938, | |
| "grad_norm": 5.1291616835145115, | |
| "learning_rate": 4.85817194626392e-06, | |
| "loss": 0.679, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.1979480989740495, | |
| "grad_norm": 3.737681461647685, | |
| "learning_rate": 4.857003881962705e-06, | |
| "loss": 0.4926, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.19835043250854958, | |
| "grad_norm": 4.703925829656442, | |
| "learning_rate": 4.855831168999132e-06, | |
| "loss": 0.6439, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.1987527660430497, | |
| "grad_norm": 4.627856862526262, | |
| "learning_rate": 4.854653809686115e-06, | |
| "loss": 0.6698, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.1991550995775498, | |
| "grad_norm": 4.364999176797817, | |
| "learning_rate": 4.853471806345732e-06, | |
| "loss": 0.6015, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.19955743311204988, | |
| "grad_norm": 4.602684651491329, | |
| "learning_rate": 4.852285161309218e-06, | |
| "loss": 0.5809, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.19995976664655, | |
| "grad_norm": 4.325677574896615, | |
| "learning_rate": 4.851093876916967e-06, | |
| "loss": 0.526, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.20036210018105008, | |
| "grad_norm": 4.81776828160967, | |
| "learning_rate": 4.849897955518518e-06, | |
| "loss": 0.6654, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.2007644337155502, | |
| "grad_norm": 5.49681992539213, | |
| "learning_rate": 4.848697399472561e-06, | |
| "loss": 0.8468, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.20116676725005028, | |
| "grad_norm": 4.782602085058219, | |
| "learning_rate": 4.8474922111469225e-06, | |
| "loss": 0.6339, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2015691007845504, | |
| "grad_norm": 4.705465090280715, | |
| "learning_rate": 4.846282392918566e-06, | |
| "loss": 0.5111, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.20197143431905049, | |
| "grad_norm": 4.044280178788859, | |
| "learning_rate": 4.845067947173589e-06, | |
| "loss": 0.4453, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.2023737678535506, | |
| "grad_norm": 5.299453770363048, | |
| "learning_rate": 4.843848876307211e-06, | |
| "loss": 0.7287, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.2027761013880507, | |
| "grad_norm": 3.9060416143674, | |
| "learning_rate": 4.842625182723779e-06, | |
| "loss": 0.4903, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.2031784349225508, | |
| "grad_norm": 4.320966869452432, | |
| "learning_rate": 4.841396868836753e-06, | |
| "loss": 0.5312, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2035807684570509, | |
| "grad_norm": 4.059590568863906, | |
| "learning_rate": 4.840163937068707e-06, | |
| "loss": 0.5953, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.203983101991551, | |
| "grad_norm": 4.544560777009811, | |
| "learning_rate": 4.838926389851324e-06, | |
| "loss": 0.5274, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.2043854355260511, | |
| "grad_norm": 4.643239965625325, | |
| "learning_rate": 4.837684229625389e-06, | |
| "loss": 0.6284, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.20478776906055118, | |
| "grad_norm": 3.848260216211789, | |
| "learning_rate": 4.836437458840783e-06, | |
| "loss": 0.5052, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.2051901025950513, | |
| "grad_norm": 4.65542258301039, | |
| "learning_rate": 4.835186079956483e-06, | |
| "loss": 0.5641, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2055924361295514, | |
| "grad_norm": 3.7048624001601738, | |
| "learning_rate": 4.8339300954405545e-06, | |
| "loss": 0.5507, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.2059947696640515, | |
| "grad_norm": 4.721724963544143, | |
| "learning_rate": 4.832669507770144e-06, | |
| "loss": 0.6945, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.2063971031985516, | |
| "grad_norm": 3.9937965024761892, | |
| "learning_rate": 4.83140431943148e-06, | |
| "loss": 0.5444, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.2067994367330517, | |
| "grad_norm": 4.150283320138307, | |
| "learning_rate": 4.830134532919863e-06, | |
| "loss": 0.5835, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.2072017702675518, | |
| "grad_norm": 5.2365671694488904, | |
| "learning_rate": 4.828860150739662e-06, | |
| "loss": 0.697, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.2076041038020519, | |
| "grad_norm": 3.98085660862504, | |
| "learning_rate": 4.827581175404311e-06, | |
| "loss": 0.4872, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.208006437336552, | |
| "grad_norm": 4.574435055815776, | |
| "learning_rate": 4.8262976094363016e-06, | |
| "loss": 0.5925, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.2084087708710521, | |
| "grad_norm": 4.459834553682444, | |
| "learning_rate": 4.825009455367181e-06, | |
| "loss": 0.6426, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.2088111044055522, | |
| "grad_norm": 3.8557681901771037, | |
| "learning_rate": 4.823716715737544e-06, | |
| "loss": 0.5719, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.20921343794005232, | |
| "grad_norm": 4.776729631302629, | |
| "learning_rate": 4.82241939309703e-06, | |
| "loss": 0.6118, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2096157714745524, | |
| "grad_norm": 4.701717061474551, | |
| "learning_rate": 4.821117490004319e-06, | |
| "loss": 0.4766, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.2100181050090525, | |
| "grad_norm": 4.486472816066567, | |
| "learning_rate": 4.819811009027122e-06, | |
| "loss": 0.6134, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.2104204385435526, | |
| "grad_norm": 4.784573013198364, | |
| "learning_rate": 4.818499952742179e-06, | |
| "loss": 0.5982, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.2108227720780527, | |
| "grad_norm": 4.953599282635229, | |
| "learning_rate": 4.817184323735258e-06, | |
| "loss": 0.5732, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.2112251056125528, | |
| "grad_norm": 4.808483380881421, | |
| "learning_rate": 4.815864124601139e-06, | |
| "loss": 0.6771, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.2116274391470529, | |
| "grad_norm": 4.024625165836511, | |
| "learning_rate": 4.814539357943622e-06, | |
| "loss": 0.6355, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.212029772681553, | |
| "grad_norm": 3.943315026550887, | |
| "learning_rate": 4.813210026375513e-06, | |
| "loss": 0.521, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.2124321062160531, | |
| "grad_norm": 3.463894710975418, | |
| "learning_rate": 4.811876132518618e-06, | |
| "loss": 0.4542, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.21283443975055322, | |
| "grad_norm": 4.513349087640872, | |
| "learning_rate": 4.810537679003746e-06, | |
| "loss": 0.6093, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.2132367732850533, | |
| "grad_norm": 4.649205790356446, | |
| "learning_rate": 4.8091946684706956e-06, | |
| "loss": 0.6726, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.21363910681955342, | |
| "grad_norm": 4.2037442337203315, | |
| "learning_rate": 4.807847103568255e-06, | |
| "loss": 0.6265, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.2140414403540535, | |
| "grad_norm": 4.893988522032171, | |
| "learning_rate": 4.806494986954195e-06, | |
| "loss": 0.7373, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.21444377388855362, | |
| "grad_norm": 4.455715486715626, | |
| "learning_rate": 4.805138321295262e-06, | |
| "loss": 0.5457, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.2148461074230537, | |
| "grad_norm": 4.649691607815862, | |
| "learning_rate": 4.803777109267174e-06, | |
| "loss": 0.5872, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.2152484409575538, | |
| "grad_norm": 4.606972921739361, | |
| "learning_rate": 4.802411353554619e-06, | |
| "loss": 0.6556, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.21565077449205391, | |
| "grad_norm": 4.262679120822924, | |
| "learning_rate": 4.801041056851244e-06, | |
| "loss": 0.5814, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.216053108026554, | |
| "grad_norm": 4.734408703763602, | |
| "learning_rate": 4.7996662218596505e-06, | |
| "loss": 0.5975, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.21645544156105412, | |
| "grad_norm": 4.0966775674750515, | |
| "learning_rate": 4.798286851291395e-06, | |
| "loss": 0.5373, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.2168577750955542, | |
| "grad_norm": 4.794723705583196, | |
| "learning_rate": 4.796902947866976e-06, | |
| "loss": 0.6411, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.21726010863005432, | |
| "grad_norm": 5.177706885118282, | |
| "learning_rate": 4.795514514315833e-06, | |
| "loss": 0.7129, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.2176624421645544, | |
| "grad_norm": 4.235334038988407, | |
| "learning_rate": 4.794121553376341e-06, | |
| "loss": 0.6368, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.21806477569905452, | |
| "grad_norm": 4.250039615427904, | |
| "learning_rate": 4.792724067795802e-06, | |
| "loss": 0.5897, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.2184671092335546, | |
| "grad_norm": 4.248095551159398, | |
| "learning_rate": 4.7913220603304455e-06, | |
| "loss": 0.6119, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.21886944276805473, | |
| "grad_norm": 4.7784528939759126, | |
| "learning_rate": 4.789915533745415e-06, | |
| "loss": 0.6428, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.21927177630255482, | |
| "grad_norm": 3.7433210513797857, | |
| "learning_rate": 4.788504490814771e-06, | |
| "loss": 0.4616, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.21967410983705493, | |
| "grad_norm": 4.477510756273104, | |
| "learning_rate": 4.78708893432148e-06, | |
| "loss": 0.5564, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.22007644337155502, | |
| "grad_norm": 4.159189423897508, | |
| "learning_rate": 4.785668867057408e-06, | |
| "loss": 0.5005, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.2204787769060551, | |
| "grad_norm": 4.801923291900065, | |
| "learning_rate": 4.784244291823321e-06, | |
| "loss": 0.6703, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.22088111044055522, | |
| "grad_norm": 4.9705885588356065, | |
| "learning_rate": 4.782815211428875e-06, | |
| "loss": 0.6892, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.2212834439750553, | |
| "grad_norm": 4.689425225044057, | |
| "learning_rate": 4.7813816286926116e-06, | |
| "loss": 0.6438, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22168577750955543, | |
| "grad_norm": 3.9964370496914836, | |
| "learning_rate": 4.7799435464419496e-06, | |
| "loss": 0.5283, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.22208811104405551, | |
| "grad_norm": 3.9063906383285523, | |
| "learning_rate": 4.778500967513186e-06, | |
| "loss": 0.5477, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.22249044457855563, | |
| "grad_norm": 4.972674347366186, | |
| "learning_rate": 4.777053894751484e-06, | |
| "loss": 0.6762, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.22289277811305572, | |
| "grad_norm": 4.605207302352324, | |
| "learning_rate": 4.77560233101087e-06, | |
| "loss": 0.5823, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.22329511164755583, | |
| "grad_norm": 5.023738846112498, | |
| "learning_rate": 4.774146279154231e-06, | |
| "loss": 0.645, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.22369744518205592, | |
| "grad_norm": 4.21478405630217, | |
| "learning_rate": 4.772685742053299e-06, | |
| "loss": 0.6361, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.22409977871655604, | |
| "grad_norm": 4.443333059344436, | |
| "learning_rate": 4.7712207225886605e-06, | |
| "loss": 0.5808, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.22450211225105612, | |
| "grad_norm": 4.136335273179504, | |
| "learning_rate": 4.7697512236497355e-06, | |
| "loss": 0.4543, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.22490444578555624, | |
| "grad_norm": 5.228702785570883, | |
| "learning_rate": 4.7682772481347835e-06, | |
| "loss": 0.6677, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.22530677932005633, | |
| "grad_norm": 4.607023738968648, | |
| "learning_rate": 4.76679879895089e-06, | |
| "loss": 0.6972, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.22570911285455642, | |
| "grad_norm": 3.9623979182055646, | |
| "learning_rate": 4.7653158790139655e-06, | |
| "loss": 0.562, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.22611144638905653, | |
| "grad_norm": 4.785540664618103, | |
| "learning_rate": 4.763828491248737e-06, | |
| "loss": 0.6667, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.22651377992355662, | |
| "grad_norm": 4.626975611158776, | |
| "learning_rate": 4.762336638588745e-06, | |
| "loss": 0.5222, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.22691611345805673, | |
| "grad_norm": 4.148716543735861, | |
| "learning_rate": 4.760840323976333e-06, | |
| "loss": 0.5792, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.22731844699255682, | |
| "grad_norm": 3.7576481851339785, | |
| "learning_rate": 4.759339550362647e-06, | |
| "loss": 0.4171, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.22772078052705694, | |
| "grad_norm": 4.056873981914918, | |
| "learning_rate": 4.757834320707629e-06, | |
| "loss": 0.5823, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.22812311406155703, | |
| "grad_norm": 4.485956120607346, | |
| "learning_rate": 4.7563246379800036e-06, | |
| "loss": 0.6038, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.22852544759605714, | |
| "grad_norm": 4.095512060377993, | |
| "learning_rate": 4.754810505157285e-06, | |
| "loss": 0.5807, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.22892778113055723, | |
| "grad_norm": 4.316692633895088, | |
| "learning_rate": 4.753291925225761e-06, | |
| "loss": 0.5689, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.22933011466505734, | |
| "grad_norm": 4.0964431049127095, | |
| "learning_rate": 4.751768901180488e-06, | |
| "loss": 0.4724, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.22973244819955743, | |
| "grad_norm": 3.8505086953071683, | |
| "learning_rate": 4.750241436025292e-06, | |
| "loss": 0.5599, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.23013478173405755, | |
| "grad_norm": 4.97127913813276, | |
| "learning_rate": 4.748709532772756e-06, | |
| "loss": 0.7189, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.23053711526855764, | |
| "grad_norm": 4.376792824386442, | |
| "learning_rate": 4.7471731944442154e-06, | |
| "loss": 0.6149, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.23093944880305772, | |
| "grad_norm": 4.633754764886852, | |
| "learning_rate": 4.745632424069755e-06, | |
| "loss": 0.6118, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.23134178233755784, | |
| "grad_norm": 4.682951429282118, | |
| "learning_rate": 4.744087224688197e-06, | |
| "loss": 0.6089, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.23174411587205793, | |
| "grad_norm": 4.504821455509545, | |
| "learning_rate": 4.742537599347101e-06, | |
| "loss": 0.5747, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.23214644940655804, | |
| "grad_norm": 4.922801626757808, | |
| "learning_rate": 4.740983551102759e-06, | |
| "loss": 0.7267, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.23254878294105813, | |
| "grad_norm": 4.128104128218885, | |
| "learning_rate": 4.73942508302018e-06, | |
| "loss": 0.5344, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.23295111647555825, | |
| "grad_norm": 4.502862129743137, | |
| "learning_rate": 4.7378621981730966e-06, | |
| "loss": 0.4465, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.23335345001005833, | |
| "grad_norm": 4.476573377559871, | |
| "learning_rate": 4.736294899643946e-06, | |
| "loss": 0.5327, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.23375578354455845, | |
| "grad_norm": 4.525722183672701, | |
| "learning_rate": 4.734723190523875e-06, | |
| "loss": 0.6182, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.23415811707905854, | |
| "grad_norm": 4.4174724143850455, | |
| "learning_rate": 4.7331470739127284e-06, | |
| "loss": 0.582, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.23456045061355865, | |
| "grad_norm": 4.087588153474757, | |
| "learning_rate": 4.731566552919042e-06, | |
| "loss": 0.5626, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.23496278414805874, | |
| "grad_norm": 4.793168855698109, | |
| "learning_rate": 4.7299816306600395e-06, | |
| "loss": 0.712, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.23536511768255883, | |
| "grad_norm": 4.088862969423477, | |
| "learning_rate": 4.728392310261628e-06, | |
| "loss": 0.5867, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.23576745121705894, | |
| "grad_norm": 4.31442875545057, | |
| "learning_rate": 4.726798594858382e-06, | |
| "loss": 0.5255, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.23616978475155903, | |
| "grad_norm": 4.2694537336500495, | |
| "learning_rate": 4.7252004875935506e-06, | |
| "loss": 0.6822, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.23657211828605915, | |
| "grad_norm": 4.179846752454285, | |
| "learning_rate": 4.723597991619043e-06, | |
| "loss": 0.5433, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.23697445182055923, | |
| "grad_norm": 4.1764050880208, | |
| "learning_rate": 4.721991110095422e-06, | |
| "loss": 0.586, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.23737678535505935, | |
| "grad_norm": 3.699033328030597, | |
| "learning_rate": 4.720379846191903e-06, | |
| "loss": 0.4942, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.23777911888955944, | |
| "grad_norm": 4.798015125682991, | |
| "learning_rate": 4.718764203086342e-06, | |
| "loss": 0.7269, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.23818145242405955, | |
| "grad_norm": 4.974938507339555, | |
| "learning_rate": 4.717144183965233e-06, | |
| "loss": 0.6753, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.23858378595855964, | |
| "grad_norm": 5.08563513422606, | |
| "learning_rate": 4.715519792023703e-06, | |
| "loss": 0.6613, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.23898611949305976, | |
| "grad_norm": 4.6691045707182655, | |
| "learning_rate": 4.7138910304655e-06, | |
| "loss": 0.5635, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.23938845302755984, | |
| "grad_norm": 4.366016787794075, | |
| "learning_rate": 4.712257902502992e-06, | |
| "loss": 0.6603, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.23979078656205996, | |
| "grad_norm": 4.98238528037948, | |
| "learning_rate": 4.710620411357157e-06, | |
| "loss": 0.6241, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.24019312009656005, | |
| "grad_norm": 4.023463566077988, | |
| "learning_rate": 4.7089785602575805e-06, | |
| "loss": 0.5202, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.24059545363106014, | |
| "grad_norm": 4.040098710840788, | |
| "learning_rate": 4.707332352442446e-06, | |
| "loss": 0.5843, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.24099778716556025, | |
| "grad_norm": 4.151014624312927, | |
| "learning_rate": 4.705681791158531e-06, | |
| "loss": 0.6206, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.24140012070006034, | |
| "grad_norm": 4.0875213661566665, | |
| "learning_rate": 4.704026879661196e-06, | |
| "loss": 0.5614, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24180245423456045, | |
| "grad_norm": 4.553877858262055, | |
| "learning_rate": 4.7023676212143834e-06, | |
| "loss": 0.6649, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.24220478776906054, | |
| "grad_norm": 4.674650194014434, | |
| "learning_rate": 4.70070401909061e-06, | |
| "loss": 0.606, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.24260712130356066, | |
| "grad_norm": 4.63353917939836, | |
| "learning_rate": 4.699036076570959e-06, | |
| "loss": 0.6668, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.24300945483806075, | |
| "grad_norm": 4.723870743698028, | |
| "learning_rate": 4.697363796945072e-06, | |
| "loss": 0.6728, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.24341178837256086, | |
| "grad_norm": 4.386624516452244, | |
| "learning_rate": 4.6956871835111475e-06, | |
| "loss": 0.5139, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.24381412190706095, | |
| "grad_norm": 4.244611479893601, | |
| "learning_rate": 4.694006239575929e-06, | |
| "loss": 0.609, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.24421645544156106, | |
| "grad_norm": 4.072589096296677, | |
| "learning_rate": 4.692320968454702e-06, | |
| "loss": 0.5657, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.24461878897606115, | |
| "grad_norm": 4.88910796894103, | |
| "learning_rate": 4.690631373471287e-06, | |
| "loss": 0.701, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.24502112251056127, | |
| "grad_norm": 4.686738823395271, | |
| "learning_rate": 4.6889374579580315e-06, | |
| "loss": 0.637, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.24542345604506136, | |
| "grad_norm": 4.905698840133413, | |
| "learning_rate": 4.687239225255805e-06, | |
| "loss": 0.6003, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.24582578957956144, | |
| "grad_norm": 4.338740241210239, | |
| "learning_rate": 4.68553667871399e-06, | |
| "loss": 0.5906, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.24622812311406156, | |
| "grad_norm": 4.119688719772878, | |
| "learning_rate": 4.68382982169048e-06, | |
| "loss": 0.4954, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.24663045664856165, | |
| "grad_norm": 3.96691281411716, | |
| "learning_rate": 4.6821186575516665e-06, | |
| "loss": 0.6126, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.24703279018306176, | |
| "grad_norm": 4.760926273551105, | |
| "learning_rate": 4.680403189672439e-06, | |
| "loss": 0.6613, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.24743512371756185, | |
| "grad_norm": 4.4384316824035075, | |
| "learning_rate": 4.678683421436173e-06, | |
| "loss": 0.5687, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.24783745725206197, | |
| "grad_norm": 3.931664070509481, | |
| "learning_rate": 4.676959356234726e-06, | |
| "loss": 0.5857, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.24823979078656205, | |
| "grad_norm": 5.119336252733733, | |
| "learning_rate": 4.6752309974684315e-06, | |
| "loss": 0.6312, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.24864212432106217, | |
| "grad_norm": 4.229793646214086, | |
| "learning_rate": 4.6734983485460895e-06, | |
| "loss": 0.5093, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.24904445785556226, | |
| "grad_norm": 4.9794376894703944, | |
| "learning_rate": 4.671761412884962e-06, | |
| "loss": 0.7092, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.24944679139006237, | |
| "grad_norm": 4.36204998657697, | |
| "learning_rate": 4.670020193910766e-06, | |
| "loss": 0.559, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.24984912492456246, | |
| "grad_norm": 4.297208174823204, | |
| "learning_rate": 4.668274695057666e-06, | |
| "loss": 0.4965, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.25025145845906255, | |
| "grad_norm": 4.235510447453734, | |
| "learning_rate": 4.666524919768267e-06, | |
| "loss": 0.5434, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.25065379199356264, | |
| "grad_norm": 4.297045599708538, | |
| "learning_rate": 4.66477087149361e-06, | |
| "loss": 0.5559, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.2510561255280628, | |
| "grad_norm": 5.638228122643803, | |
| "learning_rate": 4.663012553693161e-06, | |
| "loss": 0.6393, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.25145845906256287, | |
| "grad_norm": 4.307369262356312, | |
| "learning_rate": 4.661249969834809e-06, | |
| "loss": 0.5516, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.25186079259706295, | |
| "grad_norm": 4.348008968073508, | |
| "learning_rate": 4.659483123394855e-06, | |
| "loss": 0.5858, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.25226312613156304, | |
| "grad_norm": 4.245775793874246, | |
| "learning_rate": 4.657712017858011e-06, | |
| "loss": 0.5985, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.2526654596660632, | |
| "grad_norm": 4.27241102613026, | |
| "learning_rate": 4.6559366567173824e-06, | |
| "loss": 0.5247, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.2530677932005633, | |
| "grad_norm": 3.9143192748019384, | |
| "learning_rate": 4.6541570434744735e-06, | |
| "loss": 0.4936, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.25347012673506336, | |
| "grad_norm": 4.550429412487192, | |
| "learning_rate": 4.6523731816391725e-06, | |
| "loss": 0.7499, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.25387246026956345, | |
| "grad_norm": 4.202697728155226, | |
| "learning_rate": 4.650585074729747e-06, | |
| "loss": 0.5184, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.2542747938040636, | |
| "grad_norm": 4.083559252407811, | |
| "learning_rate": 4.648792726272838e-06, | |
| "loss": 0.5242, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.2546771273385637, | |
| "grad_norm": 3.9861255665329267, | |
| "learning_rate": 4.646996139803452e-06, | |
| "loss": 0.5269, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.25507946087306377, | |
| "grad_norm": 4.559490860669659, | |
| "learning_rate": 4.645195318864951e-06, | |
| "loss": 0.6372, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.25548179440756386, | |
| "grad_norm": 4.394457869705605, | |
| "learning_rate": 4.643390267009054e-06, | |
| "loss": 0.5105, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.25588412794206394, | |
| "grad_norm": 5.018376167864149, | |
| "learning_rate": 4.641580987795821e-06, | |
| "loss": 0.6628, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.2562864614765641, | |
| "grad_norm": 3.7893948824739465, | |
| "learning_rate": 4.639767484793648e-06, | |
| "loss": 0.508, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.2566887950110642, | |
| "grad_norm": 4.596240270523842, | |
| "learning_rate": 4.637949761579266e-06, | |
| "loss": 0.6529, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.25709112854556426, | |
| "grad_norm": 5.140583964722019, | |
| "learning_rate": 4.636127821737726e-06, | |
| "loss": 0.6489, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.25749346208006435, | |
| "grad_norm": 3.8394758085160823, | |
| "learning_rate": 4.634301668862397e-06, | |
| "loss": 0.4736, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2578957956145645, | |
| "grad_norm": 4.38356346084736, | |
| "learning_rate": 4.632471306554955e-06, | |
| "loss": 0.5353, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.2582981291490646, | |
| "grad_norm": 3.8889360472386185, | |
| "learning_rate": 4.630636738425381e-06, | |
| "loss": 0.5511, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.25870046268356467, | |
| "grad_norm": 4.817925477512709, | |
| "learning_rate": 4.62879796809195e-06, | |
| "loss": 0.6963, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.25910279621806476, | |
| "grad_norm": 4.742191877421797, | |
| "learning_rate": 4.626954999181224e-06, | |
| "loss": 0.7752, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.2595051297525649, | |
| "grad_norm": 4.245951623762571, | |
| "learning_rate": 4.625107835328048e-06, | |
| "loss": 0.5754, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.259907463287065, | |
| "grad_norm": 4.836575633325676, | |
| "learning_rate": 4.623256480175537e-06, | |
| "loss": 0.6639, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.2603097968215651, | |
| "grad_norm": 3.806176284035789, | |
| "learning_rate": 4.621400937375075e-06, | |
| "loss": 0.5292, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.26071213035606516, | |
| "grad_norm": 4.025115976057339, | |
| "learning_rate": 4.619541210586307e-06, | |
| "loss": 0.4815, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.26111446389056525, | |
| "grad_norm": 4.1347392050793825, | |
| "learning_rate": 4.617677303477124e-06, | |
| "loss": 0.5681, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.2615167974250654, | |
| "grad_norm": 4.371554358928095, | |
| "learning_rate": 4.615809219723667e-06, | |
| "loss": 0.592, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2619191309595655, | |
| "grad_norm": 4.976927834726334, | |
| "learning_rate": 4.613936963010314e-06, | |
| "loss": 0.7898, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.26232146449406557, | |
| "grad_norm": 4.80195477309382, | |
| "learning_rate": 4.612060537029671e-06, | |
| "loss": 0.6565, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.26272379802856566, | |
| "grad_norm": 4.2203256747490805, | |
| "learning_rate": 4.610179945482568e-06, | |
| "loss": 0.5597, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.2631261315630658, | |
| "grad_norm": 4.3119037789418035, | |
| "learning_rate": 4.608295192078051e-06, | |
| "loss": 0.5773, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.2635284650975659, | |
| "grad_norm": 3.938767904065124, | |
| "learning_rate": 4.606406280533373e-06, | |
| "loss": 0.4691, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.263930798632066, | |
| "grad_norm": 3.873313542196617, | |
| "learning_rate": 4.6045132145739914e-06, | |
| "loss": 0.5446, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.26433313216656606, | |
| "grad_norm": 4.720535373828625, | |
| "learning_rate": 4.602615997933552e-06, | |
| "loss": 0.6451, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.2647354657010662, | |
| "grad_norm": 5.319338434241136, | |
| "learning_rate": 4.600714634353893e-06, | |
| "loss": 0.6786, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.2651377992355663, | |
| "grad_norm": 3.986441538874652, | |
| "learning_rate": 4.598809127585026e-06, | |
| "loss": 0.5815, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.2655401327700664, | |
| "grad_norm": 4.190229942704305, | |
| "learning_rate": 4.596899481385137e-06, | |
| "loss": 0.5933, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.26594246630456647, | |
| "grad_norm": 4.211284711325467, | |
| "learning_rate": 4.5949856995205745e-06, | |
| "loss": 0.6062, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.26634479983906656, | |
| "grad_norm": 5.022372015405014, | |
| "learning_rate": 4.593067785765846e-06, | |
| "loss": 0.6632, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.2667471333735667, | |
| "grad_norm": 3.9365356585193148, | |
| "learning_rate": 4.5911457439036075e-06, | |
| "loss": 0.5324, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.2671494669080668, | |
| "grad_norm": 4.891303145874928, | |
| "learning_rate": 4.589219577724654e-06, | |
| "loss": 0.7504, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.2675518004425669, | |
| "grad_norm": 3.855940902006332, | |
| "learning_rate": 4.5872892910279185e-06, | |
| "loss": 0.5476, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.26795413397706697, | |
| "grad_norm": 4.37750980680622, | |
| "learning_rate": 4.58535488762046e-06, | |
| "loss": 0.5902, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.2683564675115671, | |
| "grad_norm": 4.450877429194335, | |
| "learning_rate": 4.583416371317454e-06, | |
| "loss": 0.6345, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.2687588010460672, | |
| "grad_norm": 4.162519879691083, | |
| "learning_rate": 4.581473745942191e-06, | |
| "loss": 0.444, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.2691611345805673, | |
| "grad_norm": 4.2236499578643025, | |
| "learning_rate": 4.579527015326065e-06, | |
| "loss": 0.492, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.2695634681150674, | |
| "grad_norm": 4.482147273633291, | |
| "learning_rate": 4.5775761833085645e-06, | |
| "loss": 0.5648, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2699658016495675, | |
| "grad_norm": 4.760060597047131, | |
| "learning_rate": 4.57562125373727e-06, | |
| "loss": 0.6674, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.2703681351840676, | |
| "grad_norm": 4.362389446472747, | |
| "learning_rate": 4.573662230467844e-06, | |
| "loss": 0.577, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.2707704687185677, | |
| "grad_norm": 3.8159558241349987, | |
| "learning_rate": 4.5716991173640165e-06, | |
| "loss": 0.5185, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.2711728022530678, | |
| "grad_norm": 4.005051687655319, | |
| "learning_rate": 4.5697319182975944e-06, | |
| "loss": 0.5922, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.27157513578756787, | |
| "grad_norm": 4.667139467518304, | |
| "learning_rate": 4.567760637148432e-06, | |
| "loss": 0.6032, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.271977469322068, | |
| "grad_norm": 3.9606776473436405, | |
| "learning_rate": 4.5657852778044435e-06, | |
| "loss": 0.5174, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.2723798028565681, | |
| "grad_norm": 4.322780347285095, | |
| "learning_rate": 4.5638058441615815e-06, | |
| "loss": 0.6089, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.2727821363910682, | |
| "grad_norm": 4.433317399280547, | |
| "learning_rate": 4.561822340123836e-06, | |
| "loss": 0.6047, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.2731844699255683, | |
| "grad_norm": 4.552805703827102, | |
| "learning_rate": 4.559834769603224e-06, | |
| "loss": 0.6281, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.2735868034600684, | |
| "grad_norm": 5.128182512733822, | |
| "learning_rate": 4.557843136519784e-06, | |
| "loss": 0.6697, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.2739891369945685, | |
| "grad_norm": 4.063920951512369, | |
| "learning_rate": 4.555847444801565e-06, | |
| "loss": 0.5885, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.2743914705290686, | |
| "grad_norm": 4.908841427141714, | |
| "learning_rate": 4.5538476983846245e-06, | |
| "loss": 0.6771, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.2747938040635687, | |
| "grad_norm": 4.405724720337232, | |
| "learning_rate": 4.551843901213012e-06, | |
| "loss": 0.6479, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.2751961375980688, | |
| "grad_norm": 3.945129587052982, | |
| "learning_rate": 4.549836057238769e-06, | |
| "loss": 0.6039, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.2755984711325689, | |
| "grad_norm": 4.96376428039534, | |
| "learning_rate": 4.547824170421921e-06, | |
| "loss": 0.7191, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.276000804667069, | |
| "grad_norm": 4.378595779287015, | |
| "learning_rate": 4.54580824473046e-06, | |
| "loss": 0.5549, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.2764031382015691, | |
| "grad_norm": 4.262426880188478, | |
| "learning_rate": 4.54378828414035e-06, | |
| "loss": 0.5024, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.2768054717360692, | |
| "grad_norm": 4.426695285396216, | |
| "learning_rate": 4.541764292635512e-06, | |
| "loss": 0.7052, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.2772078052705693, | |
| "grad_norm": 4.473404410086587, | |
| "learning_rate": 4.5397362742078145e-06, | |
| "loss": 0.5557, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.2776101388050694, | |
| "grad_norm": 4.052844689460026, | |
| "learning_rate": 4.537704232857069e-06, | |
| "loss": 0.4541, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2780124723395695, | |
| "grad_norm": 3.89050386632389, | |
| "learning_rate": 4.5356681725910255e-06, | |
| "loss": 0.549, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.2784148058740696, | |
| "grad_norm": 4.349538143222671, | |
| "learning_rate": 4.5336280974253535e-06, | |
| "loss": 0.6076, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.2788171394085697, | |
| "grad_norm": 4.361849160433631, | |
| "learning_rate": 4.5315840113836454e-06, | |
| "loss": 0.6524, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.2792194729430698, | |
| "grad_norm": 4.617234754063093, | |
| "learning_rate": 4.529535918497403e-06, | |
| "loss": 0.5616, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.2796218064775699, | |
| "grad_norm": 4.09023083083644, | |
| "learning_rate": 4.5274838228060326e-06, | |
| "loss": 0.5407, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.28002414001207, | |
| "grad_norm": 3.7157295921866718, | |
| "learning_rate": 4.525427728356832e-06, | |
| "loss": 0.5361, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.28042647354657013, | |
| "grad_norm": 4.457227461962157, | |
| "learning_rate": 4.523367639204985e-06, | |
| "loss": 0.5593, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.2808288070810702, | |
| "grad_norm": 4.348610370546784, | |
| "learning_rate": 4.52130355941356e-06, | |
| "loss": 0.5121, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.2812311406155703, | |
| "grad_norm": 3.9173059552199194, | |
| "learning_rate": 4.519235493053491e-06, | |
| "loss": 0.4927, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.2816334741500704, | |
| "grad_norm": 4.208431911956715, | |
| "learning_rate": 4.517163444203575e-06, | |
| "loss": 0.5504, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2820358076845705, | |
| "grad_norm": 4.404532536673193, | |
| "learning_rate": 4.515087416950464e-06, | |
| "loss": 0.6057, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.2824381412190706, | |
| "grad_norm": 4.163697909062566, | |
| "learning_rate": 4.513007415388659e-06, | |
| "loss": 0.4682, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.2828404747535707, | |
| "grad_norm": 5.152927142247097, | |
| "learning_rate": 4.510923443620494e-06, | |
| "loss": 0.6601, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.2832428082880708, | |
| "grad_norm": 4.544361331270338, | |
| "learning_rate": 4.50883550575614e-06, | |
| "loss": 0.5968, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.2836451418225709, | |
| "grad_norm": 4.134394917706537, | |
| "learning_rate": 4.5067436059135836e-06, | |
| "loss": 0.6647, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.28404747535707103, | |
| "grad_norm": 4.734085611831448, | |
| "learning_rate": 4.504647748218633e-06, | |
| "loss": 0.601, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.2844498088915711, | |
| "grad_norm": 4.681267518422373, | |
| "learning_rate": 4.502547936804894e-06, | |
| "loss": 0.5539, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.2848521424260712, | |
| "grad_norm": 4.325088947732379, | |
| "learning_rate": 4.500444175813776e-06, | |
| "loss": 0.5872, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.2852544759605713, | |
| "grad_norm": 3.850534498836246, | |
| "learning_rate": 4.4983364693944775e-06, | |
| "loss": 0.5061, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.28565680949507144, | |
| "grad_norm": 4.373584617561936, | |
| "learning_rate": 4.496224821703977e-06, | |
| "loss": 0.6693, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2860591430295715, | |
| "grad_norm": 4.377477636040013, | |
| "learning_rate": 4.494109236907026e-06, | |
| "loss": 0.5966, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.2864614765640716, | |
| "grad_norm": 4.271381874115667, | |
| "learning_rate": 4.491989719176142e-06, | |
| "loss": 0.6221, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.2868638100985717, | |
| "grad_norm": 3.7259627257148202, | |
| "learning_rate": 4.489866272691599e-06, | |
| "loss": 0.4313, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.2872661436330718, | |
| "grad_norm": 4.0336320558123155, | |
| "learning_rate": 4.48773890164142e-06, | |
| "loss": 0.5305, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.28766847716757193, | |
| "grad_norm": 4.585992205034134, | |
| "learning_rate": 4.485607610221367e-06, | |
| "loss": 0.7636, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.288070810702072, | |
| "grad_norm": 4.67820441036977, | |
| "learning_rate": 4.4834724026349376e-06, | |
| "loss": 0.5866, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.2884731442365721, | |
| "grad_norm": 5.012556667522281, | |
| "learning_rate": 4.4813332830933484e-06, | |
| "loss": 0.6498, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.2888754777710722, | |
| "grad_norm": 4.19440277086636, | |
| "learning_rate": 4.4791902558155345e-06, | |
| "loss": 0.5434, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.28927781130557234, | |
| "grad_norm": 4.330716534353544, | |
| "learning_rate": 4.477043325028137e-06, | |
| "loss": 0.5507, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.28968014484007243, | |
| "grad_norm": 4.097976140286037, | |
| "learning_rate": 4.474892494965495e-06, | |
| "loss": 0.4591, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2900824783745725, | |
| "grad_norm": 4.132010732456553, | |
| "learning_rate": 4.472737769869642e-06, | |
| "loss": 0.5217, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.2904848119090726, | |
| "grad_norm": 3.7815121295908343, | |
| "learning_rate": 4.470579153990288e-06, | |
| "loss": 0.4932, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.29088714544357275, | |
| "grad_norm": 3.724905186650606, | |
| "learning_rate": 4.468416651584822e-06, | |
| "loss": 0.5448, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.29128947897807284, | |
| "grad_norm": 3.82443773314746, | |
| "learning_rate": 4.4662502669182935e-06, | |
| "loss": 0.5298, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.2916918125125729, | |
| "grad_norm": 4.343371987708737, | |
| "learning_rate": 4.464080004263411e-06, | |
| "loss": 0.606, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.292094146047073, | |
| "grad_norm": 4.009570342579401, | |
| "learning_rate": 4.461905867900535e-06, | |
| "loss": 0.5916, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.2924964795815731, | |
| "grad_norm": 4.148496022814315, | |
| "learning_rate": 4.459727862117658e-06, | |
| "loss": 0.5155, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.29289881311607324, | |
| "grad_norm": 4.568599931304069, | |
| "learning_rate": 4.457545991210412e-06, | |
| "loss": 0.555, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.29330114665057333, | |
| "grad_norm": 4.026645287125634, | |
| "learning_rate": 4.455360259482047e-06, | |
| "loss": 0.5265, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.2937034801850734, | |
| "grad_norm": 4.4466778657917265, | |
| "learning_rate": 4.4531706712434305e-06, | |
| "loss": 0.6318, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2941058137195735, | |
| "grad_norm": 4.444852619715758, | |
| "learning_rate": 4.450977230813035e-06, | |
| "loss": 0.671, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.29450814725407365, | |
| "grad_norm": 5.136550532204147, | |
| "learning_rate": 4.44877994251693e-06, | |
| "loss": 0.611, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.29491048078857374, | |
| "grad_norm": 4.472272090985854, | |
| "learning_rate": 4.446578810688774e-06, | |
| "loss": 0.595, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.2953128143230738, | |
| "grad_norm": 4.755288406312762, | |
| "learning_rate": 4.444373839669808e-06, | |
| "loss": 0.801, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.2957151478575739, | |
| "grad_norm": 4.964978523542175, | |
| "learning_rate": 4.442165033808843e-06, | |
| "loss": 0.6712, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.29611748139207406, | |
| "grad_norm": 4.590633696444609, | |
| "learning_rate": 4.439952397462254e-06, | |
| "loss": 0.6214, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.29651981492657414, | |
| "grad_norm": 4.05753322012328, | |
| "learning_rate": 4.43773593499397e-06, | |
| "loss": 0.4477, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.29692214846107423, | |
| "grad_norm": 4.354711360779599, | |
| "learning_rate": 4.435515650775468e-06, | |
| "loss": 0.5618, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.2973244819955743, | |
| "grad_norm": 4.543128550216277, | |
| "learning_rate": 4.433291549185761e-06, | |
| "loss": 0.4807, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.2977268155300744, | |
| "grad_norm": 3.9444140759236737, | |
| "learning_rate": 4.431063634611392e-06, | |
| "loss": 0.5393, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.29812914906457455, | |
| "grad_norm": 4.516456335270593, | |
| "learning_rate": 4.428831911446422e-06, | |
| "loss": 0.5839, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.29853148259907464, | |
| "grad_norm": 4.4607761036583815, | |
| "learning_rate": 4.426596384092426e-06, | |
| "loss": 0.6538, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.2989338161335747, | |
| "grad_norm": 6.196021389574422, | |
| "learning_rate": 4.424357056958483e-06, | |
| "loss": 0.6484, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.2993361496680748, | |
| "grad_norm": 4.251042887568661, | |
| "learning_rate": 4.422113934461161e-06, | |
| "loss": 0.5523, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.29973848320257496, | |
| "grad_norm": 4.697763472346534, | |
| "learning_rate": 4.4198670210245186e-06, | |
| "loss": 0.6053, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.30014081673707504, | |
| "grad_norm": 4.665389980976664, | |
| "learning_rate": 4.41761632108009e-06, | |
| "loss": 0.6618, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.30054315027157513, | |
| "grad_norm": 4.2980125151698525, | |
| "learning_rate": 4.415361839066874e-06, | |
| "loss": 0.4863, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.3009454838060752, | |
| "grad_norm": 4.25422933800907, | |
| "learning_rate": 4.413103579431335e-06, | |
| "loss": 0.6285, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.30134781734057536, | |
| "grad_norm": 4.408647829220997, | |
| "learning_rate": 4.410841546627383e-06, | |
| "loss": 0.6214, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.30175015087507545, | |
| "grad_norm": 3.5964308822983626, | |
| "learning_rate": 4.408575745116371e-06, | |
| "loss": 0.4709, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.30215248440957554, | |
| "grad_norm": 4.177190728366978, | |
| "learning_rate": 4.406306179367086e-06, | |
| "loss": 0.6584, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.3025548179440756, | |
| "grad_norm": 3.9436659867851, | |
| "learning_rate": 4.404032853855736e-06, | |
| "loss": 0.5804, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.3029571514785757, | |
| "grad_norm": 4.5139405502647465, | |
| "learning_rate": 4.40175577306595e-06, | |
| "loss": 0.6131, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.30335948501307586, | |
| "grad_norm": 4.656972033870911, | |
| "learning_rate": 4.399474941488756e-06, | |
| "loss": 0.5378, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.30376181854757595, | |
| "grad_norm": 4.2754391925078785, | |
| "learning_rate": 4.397190363622588e-06, | |
| "loss": 0.5117, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.30416415208207603, | |
| "grad_norm": 4.057091286795055, | |
| "learning_rate": 4.3949020439732594e-06, | |
| "loss": 0.4969, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.3045664856165761, | |
| "grad_norm": 3.914038182134245, | |
| "learning_rate": 4.392609987053972e-06, | |
| "loss": 0.4484, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.30496881915107626, | |
| "grad_norm": 4.364733148673897, | |
| "learning_rate": 4.390314197385292e-06, | |
| "loss": 0.662, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.30537115268557635, | |
| "grad_norm": 4.866819672714999, | |
| "learning_rate": 4.388014679495154e-06, | |
| "loss": 0.6832, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.30577348622007644, | |
| "grad_norm": 4.563239673265389, | |
| "learning_rate": 4.385711437918839e-06, | |
| "loss": 0.633, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.30617581975457653, | |
| "grad_norm": 3.952293727810083, | |
| "learning_rate": 4.383404477198976e-06, | |
| "loss": 0.4319, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.30657815328907667, | |
| "grad_norm": 3.8982970475676866, | |
| "learning_rate": 4.38109380188553e-06, | |
| "loss": 0.5412, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.30698048682357676, | |
| "grad_norm": 4.767355411844817, | |
| "learning_rate": 4.3787794165357875e-06, | |
| "loss": 0.6688, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.30738282035807685, | |
| "grad_norm": 5.490594878274815, | |
| "learning_rate": 4.3764613257143585e-06, | |
| "loss": 0.8315, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.30778515389257693, | |
| "grad_norm": 4.4753569929038965, | |
| "learning_rate": 4.374139533993157e-06, | |
| "loss": 0.5577, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.308187487427077, | |
| "grad_norm": 4.234373998218113, | |
| "learning_rate": 4.371814045951396e-06, | |
| "loss": 0.6195, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.30858982096157717, | |
| "grad_norm": 3.8789979804624277, | |
| "learning_rate": 4.369484866175581e-06, | |
| "loss": 0.5812, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.30899215449607725, | |
| "grad_norm": 4.3774657256617635, | |
| "learning_rate": 4.367151999259498e-06, | |
| "loss": 0.5334, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.30939448803057734, | |
| "grad_norm": 4.413468965671534, | |
| "learning_rate": 4.364815449804204e-06, | |
| "loss": 0.6043, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.30979682156507743, | |
| "grad_norm": 4.977248443532888, | |
| "learning_rate": 4.362475222418019e-06, | |
| "loss": 0.7427, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.3101991550995776, | |
| "grad_norm": 4.449181903975735, | |
| "learning_rate": 4.360131321716518e-06, | |
| "loss": 0.5336, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.31060148863407766, | |
| "grad_norm": 4.203059807958007, | |
| "learning_rate": 4.357783752322522e-06, | |
| "loss": 0.5362, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.31100382216857775, | |
| "grad_norm": 3.8919997316821004, | |
| "learning_rate": 4.355432518866084e-06, | |
| "loss": 0.5202, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.31140615570307784, | |
| "grad_norm": 4.125644261476737, | |
| "learning_rate": 4.353077625984484e-06, | |
| "loss": 0.5136, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.311808489237578, | |
| "grad_norm": 3.7230845852814687, | |
| "learning_rate": 4.3507190783222245e-06, | |
| "loss": 0.4731, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.31221082277207807, | |
| "grad_norm": 4.619811557245368, | |
| "learning_rate": 4.348356880531011e-06, | |
| "loss": 0.698, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.31261315630657815, | |
| "grad_norm": 4.064918048601741, | |
| "learning_rate": 4.345991037269748e-06, | |
| "loss": 0.5594, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.31301548984107824, | |
| "grad_norm": 4.28030910141664, | |
| "learning_rate": 4.343621553204533e-06, | |
| "loss": 0.6272, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.31341782337557833, | |
| "grad_norm": 4.3485146730819, | |
| "learning_rate": 4.341248433008645e-06, | |
| "loss": 0.6078, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.3138201569100785, | |
| "grad_norm": 4.211683329853448, | |
| "learning_rate": 4.338871681362528e-06, | |
| "loss": 0.5247, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.31422249044457856, | |
| "grad_norm": 3.8752448583148453, | |
| "learning_rate": 4.3364913029537946e-06, | |
| "loss": 0.5274, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.31462482397907865, | |
| "grad_norm": 3.999853155987491, | |
| "learning_rate": 4.334107302477208e-06, | |
| "loss": 0.5421, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.31502715751357874, | |
| "grad_norm": 4.961501923709214, | |
| "learning_rate": 4.331719684634676e-06, | |
| "loss": 0.7151, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.3154294910480789, | |
| "grad_norm": 4.148580969978077, | |
| "learning_rate": 4.3293284541352384e-06, | |
| "loss": 0.5084, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.31583182458257897, | |
| "grad_norm": 3.623114769478068, | |
| "learning_rate": 4.326933615695064e-06, | |
| "loss": 0.4912, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.31623415811707906, | |
| "grad_norm": 4.179951168808134, | |
| "learning_rate": 4.324535174037433e-06, | |
| "loss": 0.4943, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.31663649165157914, | |
| "grad_norm": 4.206035486231768, | |
| "learning_rate": 4.322133133892737e-06, | |
| "loss": 0.6175, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.3170388251860793, | |
| "grad_norm": 4.029670988995613, | |
| "learning_rate": 4.31972749999846e-06, | |
| "loss": 0.5329, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.3174411587205794, | |
| "grad_norm": 4.024639871547482, | |
| "learning_rate": 4.317318277099178e-06, | |
| "loss": 0.5121, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.31784349225507946, | |
| "grad_norm": 4.04730827392964, | |
| "learning_rate": 4.314905469946542e-06, | |
| "loss": 0.4979, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.31824582578957955, | |
| "grad_norm": 4.440133410808506, | |
| "learning_rate": 4.312489083299275e-06, | |
| "loss": 0.6333, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.31864815932407964, | |
| "grad_norm": 4.079663513764819, | |
| "learning_rate": 4.310069121923158e-06, | |
| "loss": 0.5987, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.3190504928585798, | |
| "grad_norm": 4.17692958034715, | |
| "learning_rate": 4.307645590591024e-06, | |
| "loss": 0.4798, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.31945282639307987, | |
| "grad_norm": 4.417314019145798, | |
| "learning_rate": 4.305218494082744e-06, | |
| "loss": 0.4832, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.31985515992757996, | |
| "grad_norm": 4.920563992053252, | |
| "learning_rate": 4.302787837185224e-06, | |
| "loss": 0.6288, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.32025749346208005, | |
| "grad_norm": 4.067969775601857, | |
| "learning_rate": 4.300353624692389e-06, | |
| "loss": 0.52, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.3206598269965802, | |
| "grad_norm": 4.232889302389981, | |
| "learning_rate": 4.2979158614051805e-06, | |
| "loss": 0.6205, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.3210621605310803, | |
| "grad_norm": 4.335177482388414, | |
| "learning_rate": 4.295474552131538e-06, | |
| "loss": 0.6153, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.32146449406558036, | |
| "grad_norm": 4.698659170570413, | |
| "learning_rate": 4.2930297016863985e-06, | |
| "loss": 0.6404, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.32186682760008045, | |
| "grad_norm": 3.931383036386479, | |
| "learning_rate": 4.2905813148916816e-06, | |
| "loss": 0.599, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.32226916113458054, | |
| "grad_norm": 5.048444025824699, | |
| "learning_rate": 4.288129396576284e-06, | |
| "loss": 0.7259, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.3226714946690807, | |
| "grad_norm": 4.851397090992265, | |
| "learning_rate": 4.285673951576062e-06, | |
| "loss": 0.642, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.32307382820358077, | |
| "grad_norm": 4.28645665558046, | |
| "learning_rate": 4.283214984733833e-06, | |
| "loss": 0.5315, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.32347616173808086, | |
| "grad_norm": 4.265366711136778, | |
| "learning_rate": 4.28075250089936e-06, | |
| "loss": 0.5529, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.32387849527258095, | |
| "grad_norm": 3.848835311590686, | |
| "learning_rate": 4.278286504929338e-06, | |
| "loss": 0.5323, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.3242808288070811, | |
| "grad_norm": 4.627843495385828, | |
| "learning_rate": 4.2758170016873934e-06, | |
| "loss": 0.5561, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.3246831623415812, | |
| "grad_norm": 4.363493591898399, | |
| "learning_rate": 4.273343996044068e-06, | |
| "loss": 0.4805, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.32508549587608127, | |
| "grad_norm": 3.9805290188441043, | |
| "learning_rate": 4.270867492876812e-06, | |
| "loss": 0.5204, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.32548782941058135, | |
| "grad_norm": 3.9644340799634588, | |
| "learning_rate": 4.268387497069974e-06, | |
| "loss": 0.4642, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.3258901629450815, | |
| "grad_norm": 5.269711583475643, | |
| "learning_rate": 4.265904013514788e-06, | |
| "loss": 0.6183, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3262924964795816, | |
| "grad_norm": 4.6945618708498165, | |
| "learning_rate": 4.263417047109371e-06, | |
| "loss": 0.6567, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.32669483001408167, | |
| "grad_norm": 4.413878483629632, | |
| "learning_rate": 4.260926602758707e-06, | |
| "loss": 0.5518, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.32709716354858176, | |
| "grad_norm": 4.355371114130468, | |
| "learning_rate": 4.258432685374641e-06, | |
| "loss": 0.478, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.32749949708308185, | |
| "grad_norm": 4.625144876838143, | |
| "learning_rate": 4.255935299875864e-06, | |
| "loss": 0.5918, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.327901830617582, | |
| "grad_norm": 3.8722185488332994, | |
| "learning_rate": 4.253434451187911e-06, | |
| "loss": 0.4698, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.3283041641520821, | |
| "grad_norm": 3.8155166378719403, | |
| "learning_rate": 4.250930144243147e-06, | |
| "loss": 0.43, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.32870649768658217, | |
| "grad_norm": 4.373317134651141, | |
| "learning_rate": 4.248422383980756e-06, | |
| "loss": 0.581, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.32910883122108225, | |
| "grad_norm": 4.316760308023842, | |
| "learning_rate": 4.245911175346733e-06, | |
| "loss": 0.525, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.3295111647555824, | |
| "grad_norm": 4.1806833930239895, | |
| "learning_rate": 4.243396523293875e-06, | |
| "loss": 0.6027, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.3299134982900825, | |
| "grad_norm": 4.011770765526375, | |
| "learning_rate": 4.240878432781769e-06, | |
| "loss": 0.5642, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.3303158318245826, | |
| "grad_norm": 4.106735076414742, | |
| "learning_rate": 4.238356908776786e-06, | |
| "loss": 0.5576, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.33071816535908266, | |
| "grad_norm": 4.7608485825878955, | |
| "learning_rate": 4.235831956252069e-06, | |
| "loss": 0.7088, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.3311204988935828, | |
| "grad_norm": 4.8786607685170305, | |
| "learning_rate": 4.2333035801875175e-06, | |
| "loss": 0.6513, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.3315228324280829, | |
| "grad_norm": 4.288963783688971, | |
| "learning_rate": 4.230771785569791e-06, | |
| "loss": 0.5073, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.331925165962583, | |
| "grad_norm": 4.014903400829248, | |
| "learning_rate": 4.228236577392285e-06, | |
| "loss": 0.5901, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.33232749949708307, | |
| "grad_norm": 4.154812074980563, | |
| "learning_rate": 4.225697960655131e-06, | |
| "loss": 0.5568, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.33272983303158316, | |
| "grad_norm": 3.827755329803438, | |
| "learning_rate": 4.223155940365181e-06, | |
| "loss": 0.4738, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.3331321665660833, | |
| "grad_norm": 3.856077516315061, | |
| "learning_rate": 4.220610521536002e-06, | |
| "loss": 0.4963, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.3335345001005834, | |
| "grad_norm": 4.615466783911023, | |
| "learning_rate": 4.218061709187862e-06, | |
| "loss": 0.5656, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.3339368336350835, | |
| "grad_norm": 4.176966772191371, | |
| "learning_rate": 4.215509508347723e-06, | |
| "loss": 0.5398, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.33433916716958356, | |
| "grad_norm": 4.49327690032652, | |
| "learning_rate": 4.212953924049229e-06, | |
| "loss": 0.6772, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.3347415007040837, | |
| "grad_norm": 4.067266904294674, | |
| "learning_rate": 4.210394961332698e-06, | |
| "loss": 0.5311, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.3351438342385838, | |
| "grad_norm": 3.7375221543142128, | |
| "learning_rate": 4.207832625245112e-06, | |
| "loss": 0.4573, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.3355461677730839, | |
| "grad_norm": 4.056435688935082, | |
| "learning_rate": 4.205266920840103e-06, | |
| "loss": 0.543, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.33594850130758397, | |
| "grad_norm": 3.9887060748088254, | |
| "learning_rate": 4.202697853177951e-06, | |
| "loss": 0.4613, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.3363508348420841, | |
| "grad_norm": 3.5896338953258473, | |
| "learning_rate": 4.2001254273255646e-06, | |
| "loss": 0.4031, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.3367531683765842, | |
| "grad_norm": 4.094188015061212, | |
| "learning_rate": 4.197549648356478e-06, | |
| "loss": 0.5933, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.3371555019110843, | |
| "grad_norm": 3.6605198066957216, | |
| "learning_rate": 4.19497052135084e-06, | |
| "loss": 0.417, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.3375578354455844, | |
| "grad_norm": 5.538220960178179, | |
| "learning_rate": 4.192388051395398e-06, | |
| "loss": 0.7984, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.33796016898008446, | |
| "grad_norm": 4.543985654558515, | |
| "learning_rate": 4.1898022435835e-06, | |
| "loss": 0.5526, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3383625025145846, | |
| "grad_norm": 3.719487495093868, | |
| "learning_rate": 4.187213103015069e-06, | |
| "loss": 0.4399, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.3387648360490847, | |
| "grad_norm": 3.958292027050775, | |
| "learning_rate": 4.184620634796608e-06, | |
| "loss": 0.4891, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.3391671695835848, | |
| "grad_norm": 4.219443448956219, | |
| "learning_rate": 4.182024844041177e-06, | |
| "loss": 0.6417, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.33956950311808487, | |
| "grad_norm": 4.637518704467857, | |
| "learning_rate": 4.179425735868395e-06, | |
| "loss": 0.5912, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.339971836652585, | |
| "grad_norm": 4.725084492132898, | |
| "learning_rate": 4.176823315404419e-06, | |
| "loss": 0.705, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.3403741701870851, | |
| "grad_norm": 4.081067015807228, | |
| "learning_rate": 4.17421758778194e-06, | |
| "loss": 0.5006, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.3407765037215852, | |
| "grad_norm": 4.431343567792434, | |
| "learning_rate": 4.1716085581401746e-06, | |
| "loss": 0.5081, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.3411788372560853, | |
| "grad_norm": 4.173696495740008, | |
| "learning_rate": 4.1689962316248475e-06, | |
| "loss": 0.497, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.3415811707905854, | |
| "grad_norm": 4.125681881515539, | |
| "learning_rate": 4.166380613388189e-06, | |
| "loss": 0.5498, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.3419835043250855, | |
| "grad_norm": 4.333217037647524, | |
| "learning_rate": 4.163761708588919e-06, | |
| "loss": 0.5597, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.3423858378595856, | |
| "grad_norm": 4.191848923537687, | |
| "learning_rate": 4.161139522392243e-06, | |
| "loss": 0.7259, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.3427881713940857, | |
| "grad_norm": 3.1772920188403164, | |
| "learning_rate": 4.158514059969834e-06, | |
| "loss": 0.4225, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.34319050492858577, | |
| "grad_norm": 4.166291911511059, | |
| "learning_rate": 4.15588532649983e-06, | |
| "loss": 0.6061, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.3435928384630859, | |
| "grad_norm": 3.5991395036430447, | |
| "learning_rate": 4.1532533271668175e-06, | |
| "loss": 0.461, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.343995171997586, | |
| "grad_norm": 4.29116994161306, | |
| "learning_rate": 4.150618067161828e-06, | |
| "loss": 0.567, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.3443975055320861, | |
| "grad_norm": 4.403897532608296, | |
| "learning_rate": 4.14797955168232e-06, | |
| "loss": 0.6118, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.3447998390665862, | |
| "grad_norm": 4.145689255622535, | |
| "learning_rate": 4.145337785932174e-06, | |
| "loss": 0.5709, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.3452021726010863, | |
| "grad_norm": 4.540600097537597, | |
| "learning_rate": 4.142692775121684e-06, | |
| "loss": 0.5506, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.3456045061355864, | |
| "grad_norm": 4.730037786980354, | |
| "learning_rate": 4.14004452446754e-06, | |
| "loss": 0.6088, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.3460068396700865, | |
| "grad_norm": 3.9409862740254487, | |
| "learning_rate": 4.137393039192822e-06, | |
| "loss": 0.5775, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3464091732045866, | |
| "grad_norm": 4.113320460512886, | |
| "learning_rate": 4.1347383245269935e-06, | |
| "loss": 0.5695, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.34681150673908673, | |
| "grad_norm": 4.6445735426137364, | |
| "learning_rate": 4.1320803857058835e-06, | |
| "loss": 0.6158, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.3472138402735868, | |
| "grad_norm": 4.634455536740089, | |
| "learning_rate": 4.129419227971681e-06, | |
| "loss": 0.6799, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.3476161738080869, | |
| "grad_norm": 4.767706536948578, | |
| "learning_rate": 4.1267548565729235e-06, | |
| "loss": 0.5473, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.348018507342587, | |
| "grad_norm": 4.4478746327105405, | |
| "learning_rate": 4.124087276764488e-06, | |
| "loss": 0.6564, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.3484208408770871, | |
| "grad_norm": 5.1282545461378195, | |
| "learning_rate": 4.121416493807577e-06, | |
| "loss": 0.617, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.3488231744115872, | |
| "grad_norm": 3.850370414902996, | |
| "learning_rate": 4.118742512969713e-06, | |
| "loss": 0.5775, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.3492255079460873, | |
| "grad_norm": 4.66805214245803, | |
| "learning_rate": 4.116065339524724e-06, | |
| "loss": 0.6724, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.3496278414805874, | |
| "grad_norm": 4.7651508619199765, | |
| "learning_rate": 4.113384978752734e-06, | |
| "loss": 0.6151, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.3500301750150875, | |
| "grad_norm": 4.207593613220341, | |
| "learning_rate": 4.110701435940157e-06, | |
| "loss": 0.641, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.35043250854958763, | |
| "grad_norm": 4.3034195107515965, | |
| "learning_rate": 4.108014716379679e-06, | |
| "loss": 0.6542, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.3508348420840877, | |
| "grad_norm": 3.987719667248183, | |
| "learning_rate": 4.105324825370251e-06, | |
| "loss": 0.4996, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.3512371756185878, | |
| "grad_norm": 3.9071320373239824, | |
| "learning_rate": 4.102631768217083e-06, | |
| "loss": 0.5168, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.3516395091530879, | |
| "grad_norm": 4.2277706570506295, | |
| "learning_rate": 4.099935550231626e-06, | |
| "loss": 0.5379, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.35204184268758804, | |
| "grad_norm": 3.9154962483786373, | |
| "learning_rate": 4.097236176731567e-06, | |
| "loss": 0.5523, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.3524441762220881, | |
| "grad_norm": 3.465039425796171, | |
| "learning_rate": 4.094533653040814e-06, | |
| "loss": 0.4001, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.3528465097565882, | |
| "grad_norm": 3.4706674601181167, | |
| "learning_rate": 4.091827984489493e-06, | |
| "loss": 0.4629, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.3532488432910883, | |
| "grad_norm": 4.340590023749821, | |
| "learning_rate": 4.089119176413926e-06, | |
| "loss": 0.6815, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.3536511768255884, | |
| "grad_norm": 4.471466388864396, | |
| "learning_rate": 4.086407234156633e-06, | |
| "loss": 0.6459, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.35405351036008853, | |
| "grad_norm": 4.449019271914241, | |
| "learning_rate": 4.08369216306631e-06, | |
| "loss": 0.5592, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3544558438945886, | |
| "grad_norm": 4.125369265434845, | |
| "learning_rate": 4.080973968497829e-06, | |
| "loss": 0.5138, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.3548581774290887, | |
| "grad_norm": 4.523305588736027, | |
| "learning_rate": 4.07825265581222e-06, | |
| "loss": 0.5929, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.3552605109635888, | |
| "grad_norm": 4.077469504247697, | |
| "learning_rate": 4.075528230376662e-06, | |
| "loss": 0.5288, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.35566284449808894, | |
| "grad_norm": 5.092003038039258, | |
| "learning_rate": 4.072800697564474e-06, | |
| "loss": 0.6624, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.356065178032589, | |
| "grad_norm": 3.7756634610021935, | |
| "learning_rate": 4.070070062755105e-06, | |
| "loss": 0.5265, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.3564675115670891, | |
| "grad_norm": 4.106660405022582, | |
| "learning_rate": 4.0673363313341195e-06, | |
| "loss": 0.4853, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.3568698451015892, | |
| "grad_norm": 4.215215996153993, | |
| "learning_rate": 4.064599508693191e-06, | |
| "loss": 0.5721, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.35727217863608934, | |
| "grad_norm": 4.171756065223583, | |
| "learning_rate": 4.06185960023009e-06, | |
| "loss": 0.4649, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.35767451217058943, | |
| "grad_norm": 5.138546885500691, | |
| "learning_rate": 4.05911661134867e-06, | |
| "loss": 0.5972, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.3580768457050895, | |
| "grad_norm": 4.136963664338033, | |
| "learning_rate": 4.0563705474588655e-06, | |
| "loss": 0.5027, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3584791792395896, | |
| "grad_norm": 3.940446094705443, | |
| "learning_rate": 4.05362141397667e-06, | |
| "loss": 0.5627, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.3588815127740897, | |
| "grad_norm": 4.024692229149252, | |
| "learning_rate": 4.0508692163241356e-06, | |
| "loss": 0.5521, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.35928384630858984, | |
| "grad_norm": 3.866043251441755, | |
| "learning_rate": 4.048113959929354e-06, | |
| "loss": 0.5125, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.3596861798430899, | |
| "grad_norm": 4.786779622248551, | |
| "learning_rate": 4.0453556502264535e-06, | |
| "loss": 0.6475, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.36008851337759, | |
| "grad_norm": 4.127247200504385, | |
| "learning_rate": 4.042594292655581e-06, | |
| "loss": 0.4877, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.3604908469120901, | |
| "grad_norm": 3.3408217784719634, | |
| "learning_rate": 4.039829892662897e-06, | |
| "loss": 0.4337, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.36089318044659024, | |
| "grad_norm": 4.010232394860985, | |
| "learning_rate": 4.037062455700559e-06, | |
| "loss": 0.493, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.36129551398109033, | |
| "grad_norm": 4.3695516591433785, | |
| "learning_rate": 4.03429198722672e-06, | |
| "loss": 0.6051, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.3616978475155904, | |
| "grad_norm": 4.766410636581824, | |
| "learning_rate": 4.0315184927055065e-06, | |
| "loss": 0.6602, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.3621001810500905, | |
| "grad_norm": 4.1149436048907955, | |
| "learning_rate": 4.028741977607016e-06, | |
| "loss": 0.599, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.36250251458459065, | |
| "grad_norm": 3.4871089312720045, | |
| "learning_rate": 4.0259624474073025e-06, | |
| "loss": 0.4779, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.36290484811909074, | |
| "grad_norm": 3.6367752409912266, | |
| "learning_rate": 4.023179907588367e-06, | |
| "loss": 0.4541, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.3633071816535908, | |
| "grad_norm": 3.3961433962608685, | |
| "learning_rate": 4.020394363638147e-06, | |
| "loss": 0.4265, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.3637095151880909, | |
| "grad_norm": 3.8352646417492524, | |
| "learning_rate": 4.0176058210505045e-06, | |
| "loss": 0.5351, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.364111848722591, | |
| "grad_norm": 4.242220553627046, | |
| "learning_rate": 4.0148142853252125e-06, | |
| "loss": 0.5792, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.36451418225709115, | |
| "grad_norm": 4.096283695840529, | |
| "learning_rate": 4.0120197619679536e-06, | |
| "loss": 0.5412, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.36491651579159123, | |
| "grad_norm": 4.729251903541468, | |
| "learning_rate": 4.009222256490297e-06, | |
| "loss": 0.6285, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.3653188493260913, | |
| "grad_norm": 4.084180186714877, | |
| "learning_rate": 4.006421774409697e-06, | |
| "loss": 0.6304, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.3657211828605914, | |
| "grad_norm": 4.280731987602122, | |
| "learning_rate": 4.003618321249476e-06, | |
| "loss": 0.5632, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.36612351639509155, | |
| "grad_norm": 3.868764716611033, | |
| "learning_rate": 4.000811902538821e-06, | |
| "loss": 0.4715, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.36652584992959164, | |
| "grad_norm": 3.6445778686982155, | |
| "learning_rate": 3.99800252381276e-06, | |
| "loss": 0.4084, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.36692818346409173, | |
| "grad_norm": 4.600601862430362, | |
| "learning_rate": 3.995190190612165e-06, | |
| "loss": 0.7218, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.3673305169985918, | |
| "grad_norm": 4.09418361850337, | |
| "learning_rate": 3.992374908483735e-06, | |
| "loss": 0.5913, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.36773285053309196, | |
| "grad_norm": 4.495329343052196, | |
| "learning_rate": 3.9895566829799825e-06, | |
| "loss": 0.6735, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.36813518406759205, | |
| "grad_norm": 4.408452152812246, | |
| "learning_rate": 3.986735519659226e-06, | |
| "loss": 0.6896, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.36853751760209214, | |
| "grad_norm": 3.7084757585295276, | |
| "learning_rate": 3.983911424085578e-06, | |
| "loss": 0.552, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.3689398511365922, | |
| "grad_norm": 3.8368900456075985, | |
| "learning_rate": 3.981084401828937e-06, | |
| "loss": 0.4944, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.3693421846710923, | |
| "grad_norm": 5.696133375208026, | |
| "learning_rate": 3.978254458464969e-06, | |
| "loss": 0.6338, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.36974451820559245, | |
| "grad_norm": 4.144365741395642, | |
| "learning_rate": 3.975421599575103e-06, | |
| "loss": 0.6463, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.37014685174009254, | |
| "grad_norm": 3.7665583063095682, | |
| "learning_rate": 3.972585830746522e-06, | |
| "loss": 0.4442, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.37054918527459263, | |
| "grad_norm": 4.341604001350467, | |
| "learning_rate": 3.969747157572142e-06, | |
| "loss": 0.7191, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.3709515188090927, | |
| "grad_norm": 4.724527719836307, | |
| "learning_rate": 3.966905585650611e-06, | |
| "loss": 0.7508, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.37135385234359286, | |
| "grad_norm": 6.112644943798556, | |
| "learning_rate": 3.964061120586294e-06, | |
| "loss": 0.6744, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.37175618587809295, | |
| "grad_norm": 4.630742134072846, | |
| "learning_rate": 3.961213767989261e-06, | |
| "loss": 0.5703, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.37215851941259304, | |
| "grad_norm": 3.5499854666112802, | |
| "learning_rate": 3.958363533475277e-06, | |
| "loss": 0.4563, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.3725608529470931, | |
| "grad_norm": 3.926859330076321, | |
| "learning_rate": 3.955510422665791e-06, | |
| "loss": 0.5159, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.37296318648159327, | |
| "grad_norm": 4.029967110899967, | |
| "learning_rate": 3.952654441187927e-06, | |
| "loss": 0.5267, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.37336552001609336, | |
| "grad_norm": 3.4483709639926414, | |
| "learning_rate": 3.9497955946744675e-06, | |
| "loss": 0.4584, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.37376785355059344, | |
| "grad_norm": 4.752285945278346, | |
| "learning_rate": 3.946933888763847e-06, | |
| "loss": 0.5743, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.37417018708509353, | |
| "grad_norm": 3.985801543832948, | |
| "learning_rate": 3.9440693291001406e-06, | |
| "loss": 0.5319, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3745725206195936, | |
| "grad_norm": 3.5946794819227903, | |
| "learning_rate": 3.941201921333048e-06, | |
| "loss": 0.4217, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.37497485415409376, | |
| "grad_norm": 4.035357970865375, | |
| "learning_rate": 3.9383316711178946e-06, | |
| "loss": 0.503, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.37537718768859385, | |
| "grad_norm": 4.034839036975055, | |
| "learning_rate": 3.935458584115599e-06, | |
| "loss": 0.4698, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.37577952122309394, | |
| "grad_norm": 4.286963186422666, | |
| "learning_rate": 3.932582665992688e-06, | |
| "loss": 0.5901, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.376181854757594, | |
| "grad_norm": 4.021909925329209, | |
| "learning_rate": 3.929703922421263e-06, | |
| "loss": 0.5111, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.37658418829209417, | |
| "grad_norm": 4.0942098752342035, | |
| "learning_rate": 3.926822359079001e-06, | |
| "loss": 0.5472, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.37698652182659426, | |
| "grad_norm": 4.261504012145757, | |
| "learning_rate": 3.923937981649143e-06, | |
| "loss": 0.6453, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.37738885536109434, | |
| "grad_norm": 4.014117574115026, | |
| "learning_rate": 3.921050795820474e-06, | |
| "loss": 0.5056, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.37779118889559443, | |
| "grad_norm": 3.904726379027022, | |
| "learning_rate": 3.9181608072873244e-06, | |
| "loss": 0.5304, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.3781935224300946, | |
| "grad_norm": 3.881019901522325, | |
| "learning_rate": 3.9152680217495475e-06, | |
| "loss": 0.5298, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.37859585596459466, | |
| "grad_norm": 4.058485666284913, | |
| "learning_rate": 3.912372444912517e-06, | |
| "loss": 0.574, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.37899818949909475, | |
| "grad_norm": 3.9630383581476205, | |
| "learning_rate": 3.909474082487107e-06, | |
| "loss": 0.5804, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.37940052303359484, | |
| "grad_norm": 3.8676063595588683, | |
| "learning_rate": 3.906572940189691e-06, | |
| "loss": 0.4804, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.3798028565680949, | |
| "grad_norm": 4.32834774261345, | |
| "learning_rate": 3.9036690237421215e-06, | |
| "loss": 0.6161, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.38020519010259507, | |
| "grad_norm": 4.726002477400136, | |
| "learning_rate": 3.900762338871723e-06, | |
| "loss": 0.6898, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.38060752363709516, | |
| "grad_norm": 4.577558255257823, | |
| "learning_rate": 3.897852891311282e-06, | |
| "loss": 0.5504, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.38100985717159525, | |
| "grad_norm": 4.300878005170684, | |
| "learning_rate": 3.89494068679903e-06, | |
| "loss": 0.6734, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.38141219070609533, | |
| "grad_norm": 4.519778162808581, | |
| "learning_rate": 3.892025731078641e-06, | |
| "loss": 0.5694, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.3818145242405955, | |
| "grad_norm": 3.8176275411987683, | |
| "learning_rate": 3.88910802989921e-06, | |
| "loss": 0.4444, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.38221685777509556, | |
| "grad_norm": 4.014909553832162, | |
| "learning_rate": 3.886187589015251e-06, | |
| "loss": 0.6054, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.38261919130959565, | |
| "grad_norm": 4.267781377287406, | |
| "learning_rate": 3.883264414186677e-06, | |
| "loss": 0.6257, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.38302152484409574, | |
| "grad_norm": 4.345044782459552, | |
| "learning_rate": 3.8803385111788e-06, | |
| "loss": 0.5543, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.3834238583785959, | |
| "grad_norm": 4.221128544535545, | |
| "learning_rate": 3.877409885762305e-06, | |
| "loss": 0.5544, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.38382619191309597, | |
| "grad_norm": 4.000060887768021, | |
| "learning_rate": 3.8744785437132506e-06, | |
| "loss": 0.5878, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.38422852544759606, | |
| "grad_norm": 3.8372993108948488, | |
| "learning_rate": 3.871544490813054e-06, | |
| "loss": 0.5076, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.38463085898209615, | |
| "grad_norm": 4.059706126497868, | |
| "learning_rate": 3.868607732848475e-06, | |
| "loss": 0.5353, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.38503319251659623, | |
| "grad_norm": 4.408924474844376, | |
| "learning_rate": 3.865668275611614e-06, | |
| "loss": 0.519, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.3854355260510964, | |
| "grad_norm": 4.010089079135815, | |
| "learning_rate": 3.862726124899889e-06, | |
| "loss": 0.5952, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.38583785958559647, | |
| "grad_norm": 5.459071377748317, | |
| "learning_rate": 3.859781286516037e-06, | |
| "loss": 0.7175, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.38624019312009655, | |
| "grad_norm": 4.542741280511702, | |
| "learning_rate": 3.85683376626809e-06, | |
| "loss": 0.5667, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.38664252665459664, | |
| "grad_norm": 3.99184341470565, | |
| "learning_rate": 3.853883569969372e-06, | |
| "loss": 0.6148, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.3870448601890968, | |
| "grad_norm": 4.078873510656604, | |
| "learning_rate": 3.850930703438484e-06, | |
| "loss": 0.5383, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.38744719372359687, | |
| "grad_norm": 3.896065226196885, | |
| "learning_rate": 3.847975172499295e-06, | |
| "loss": 0.6785, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.38784952725809696, | |
| "grad_norm": 4.328062559714918, | |
| "learning_rate": 3.845016982980929e-06, | |
| "loss": 0.5543, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.38825186079259705, | |
| "grad_norm": 4.2981177827507935, | |
| "learning_rate": 3.842056140717749e-06, | |
| "loss": 0.6935, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.3886541943270972, | |
| "grad_norm": 4.489409385825919, | |
| "learning_rate": 3.839092651549357e-06, | |
| "loss": 0.6412, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.3890565278615973, | |
| "grad_norm": 4.162311148181462, | |
| "learning_rate": 3.836126521320569e-06, | |
| "loss": 0.5133, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.38945886139609737, | |
| "grad_norm": 3.7564756341509464, | |
| "learning_rate": 3.833157755881414e-06, | |
| "loss": 0.5198, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.38986119493059745, | |
| "grad_norm": 4.2353823981014385, | |
| "learning_rate": 3.830186361087117e-06, | |
| "loss": 0.4271, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.39026352846509754, | |
| "grad_norm": 3.4858668216855437, | |
| "learning_rate": 3.827212342798089e-06, | |
| "loss": 0.4654, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3906658619995977, | |
| "grad_norm": 3.920259549785761, | |
| "learning_rate": 3.824235706879915e-06, | |
| "loss": 0.5822, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.3910681955340978, | |
| "grad_norm": 4.239371514940848, | |
| "learning_rate": 3.821256459203343e-06, | |
| "loss": 0.5093, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.39147052906859786, | |
| "grad_norm": 4.037277711642943, | |
| "learning_rate": 3.81827460564427e-06, | |
| "loss": 0.5222, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.39187286260309795, | |
| "grad_norm": 4.456827976048942, | |
| "learning_rate": 3.815290152083737e-06, | |
| "loss": 0.6783, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.3922751961375981, | |
| "grad_norm": 4.246220187755789, | |
| "learning_rate": 3.8123031044079084e-06, | |
| "loss": 0.5888, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.3926775296720982, | |
| "grad_norm": 4.3203808668287635, | |
| "learning_rate": 3.809313468508068e-06, | |
| "loss": 0.5569, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.39307986320659827, | |
| "grad_norm": 4.860143257226567, | |
| "learning_rate": 3.8063212502806035e-06, | |
| "loss": 0.5953, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.39348219674109836, | |
| "grad_norm": 4.233378915892834, | |
| "learning_rate": 3.8033264556269954e-06, | |
| "loss": 0.6167, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.39388453027559844, | |
| "grad_norm": 4.3534789683754305, | |
| "learning_rate": 3.8003290904538035e-06, | |
| "loss": 0.6133, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.3942868638100986, | |
| "grad_norm": 3.9519882332598257, | |
| "learning_rate": 3.7973291606726614e-06, | |
| "loss": 0.4515, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3946891973445987, | |
| "grad_norm": 4.138408229253922, | |
| "learning_rate": 3.794326672200258e-06, | |
| "loss": 0.5607, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.39509153087909876, | |
| "grad_norm": 4.3761985221008635, | |
| "learning_rate": 3.79132163095833e-06, | |
| "loss": 0.5507, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.39549386441359885, | |
| "grad_norm": 4.023180334056979, | |
| "learning_rate": 3.7883140428736477e-06, | |
| "loss": 0.5314, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.395896197948099, | |
| "grad_norm": 4.151232898341051, | |
| "learning_rate": 3.7853039138780057e-06, | |
| "loss": 0.5392, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.3962985314825991, | |
| "grad_norm": 4.539638233125882, | |
| "learning_rate": 3.7822912499082087e-06, | |
| "loss": 0.5791, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.39670086501709917, | |
| "grad_norm": 4.335330132423181, | |
| "learning_rate": 3.7792760569060626e-06, | |
| "loss": 0.5178, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.39710319855159926, | |
| "grad_norm": 4.143033413053681, | |
| "learning_rate": 3.7762583408183594e-06, | |
| "loss": 0.5815, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.3975055320860994, | |
| "grad_norm": 4.915299915525361, | |
| "learning_rate": 3.7732381075968694e-06, | |
| "loss": 0.6269, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.3979078656205995, | |
| "grad_norm": 3.7212476195736506, | |
| "learning_rate": 3.770215363198325e-06, | |
| "loss": 0.4599, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.3983101991550996, | |
| "grad_norm": 3.9977745943956884, | |
| "learning_rate": 3.7671901135844148e-06, | |
| "loss": 0.5973, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.39871253268959966, | |
| "grad_norm": 4.3088468446821615, | |
| "learning_rate": 3.764162364721764e-06, | |
| "loss": 0.5963, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.39911486622409975, | |
| "grad_norm": 4.2991674025596955, | |
| "learning_rate": 3.761132122581931e-06, | |
| "loss": 0.6923, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.3995171997585999, | |
| "grad_norm": 3.952746214125807, | |
| "learning_rate": 3.758099393141388e-06, | |
| "loss": 0.5384, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.3999195332931, | |
| "grad_norm": 4.327315800519712, | |
| "learning_rate": 3.755064182381516e-06, | |
| "loss": 0.5144, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.40032186682760007, | |
| "grad_norm": 4.713412610694241, | |
| "learning_rate": 3.752026496288588e-06, | |
| "loss": 0.641, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.40072420036210016, | |
| "grad_norm": 3.9912240405323507, | |
| "learning_rate": 3.74898634085376e-06, | |
| "loss": 0.5646, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.4011265338966003, | |
| "grad_norm": 4.530973641905437, | |
| "learning_rate": 3.7459437220730583e-06, | |
| "loss": 0.5614, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.4015288674311004, | |
| "grad_norm": 6.864543852090396, | |
| "learning_rate": 3.742898645947366e-06, | |
| "loss": 0.5336, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.4019312009656005, | |
| "grad_norm": 4.429956976671279, | |
| "learning_rate": 3.7398511184824144e-06, | |
| "loss": 0.7004, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.40233353450010056, | |
| "grad_norm": 3.164463326468864, | |
| "learning_rate": 3.7368011456887693e-06, | |
| "loss": 0.4163, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.4027358680346007, | |
| "grad_norm": 3.7984416296897456, | |
| "learning_rate": 3.7337487335818185e-06, | |
| "loss": 0.5342, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.4031382015691008, | |
| "grad_norm": 4.186189188641626, | |
| "learning_rate": 3.730693888181761e-06, | |
| "loss": 0.6125, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.4035405351036009, | |
| "grad_norm": 3.8782636909404506, | |
| "learning_rate": 3.7276366155135968e-06, | |
| "loss": 0.5669, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.40394286863810097, | |
| "grad_norm": 4.222464351010827, | |
| "learning_rate": 3.7245769216071104e-06, | |
| "loss": 0.5574, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.40434520217260106, | |
| "grad_norm": 4.0429197887930135, | |
| "learning_rate": 3.721514812496863e-06, | |
| "loss": 0.5031, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.4047475357071012, | |
| "grad_norm": 4.015964628315664, | |
| "learning_rate": 3.718450294222179e-06, | |
| "loss": 0.5994, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.4051498692416013, | |
| "grad_norm": 3.846521589103569, | |
| "learning_rate": 3.7153833728271356e-06, | |
| "loss": 0.4928, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.4055522027761014, | |
| "grad_norm": 4.138445980201307, | |
| "learning_rate": 3.712314054360547e-06, | |
| "loss": 0.5722, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.40595453631060147, | |
| "grad_norm": 4.476195845673172, | |
| "learning_rate": 3.7092423448759577e-06, | |
| "loss": 0.563, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.4063568698451016, | |
| "grad_norm": 3.786638148470317, | |
| "learning_rate": 3.7061682504316264e-06, | |
| "loss": 0.5181, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.4067592033796017, | |
| "grad_norm": 4.563364096910093, | |
| "learning_rate": 3.7030917770905155e-06, | |
| "loss": 0.5247, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.4071615369141018, | |
| "grad_norm": 4.276417493626447, | |
| "learning_rate": 3.7000129309202804e-06, | |
| "loss": 0.5815, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.4075638704486019, | |
| "grad_norm": 4.495474983317033, | |
| "learning_rate": 3.696931717993256e-06, | |
| "loss": 0.7269, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.407966203983102, | |
| "grad_norm": 3.6238415974427527, | |
| "learning_rate": 3.693848144386444e-06, | |
| "loss": 0.5462, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.4083685375176021, | |
| "grad_norm": 4.262996632479673, | |
| "learning_rate": 3.6907622161815027e-06, | |
| "loss": 0.64, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.4087708710521022, | |
| "grad_norm": 3.8744140465414745, | |
| "learning_rate": 3.687673939464736e-06, | |
| "loss": 0.5281, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.4091732045866023, | |
| "grad_norm": 4.021193749407921, | |
| "learning_rate": 3.6845833203270766e-06, | |
| "loss": 0.5238, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.40957553812110237, | |
| "grad_norm": 3.7942462225606084, | |
| "learning_rate": 3.6814903648640787e-06, | |
| "loss": 0.5274, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.4099778716556025, | |
| "grad_norm": 3.8584763272892193, | |
| "learning_rate": 3.6783950791759053e-06, | |
| "loss": 0.6652, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.4103802051901026, | |
| "grad_norm": 4.224914273954211, | |
| "learning_rate": 3.6752974693673144e-06, | |
| "loss": 0.6418, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.4107825387246027, | |
| "grad_norm": 3.611777243329413, | |
| "learning_rate": 3.6721975415476475e-06, | |
| "loss": 0.4478, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.4111848722591028, | |
| "grad_norm": 4.492308490256186, | |
| "learning_rate": 3.669095301830816e-06, | |
| "loss": 0.6383, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.4115872057936029, | |
| "grad_norm": 3.966328790708119, | |
| "learning_rate": 3.6659907563352964e-06, | |
| "loss": 0.5496, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.411989539328103, | |
| "grad_norm": 4.331090241917638, | |
| "learning_rate": 3.6628839111841067e-06, | |
| "loss": 0.6138, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.4123918728626031, | |
| "grad_norm": 4.039380276956274, | |
| "learning_rate": 3.6597747725048054e-06, | |
| "loss": 0.514, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.4127942063971032, | |
| "grad_norm": 4.2678553912438515, | |
| "learning_rate": 3.656663346429471e-06, | |
| "loss": 0.5304, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.4131965399316033, | |
| "grad_norm": 3.6311274537700444, | |
| "learning_rate": 3.6535496390946958e-06, | |
| "loss": 0.5324, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.4135988734661034, | |
| "grad_norm": 4.179608207864191, | |
| "learning_rate": 3.650433656641569e-06, | |
| "loss": 0.5913, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.4140012070006035, | |
| "grad_norm": 3.8042489733382308, | |
| "learning_rate": 3.647315405215668e-06, | |
| "loss": 0.5007, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.4144035405351036, | |
| "grad_norm": 3.4526666984193404, | |
| "learning_rate": 3.644194890967047e-06, | |
| "loss": 0.4141, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.4148058740696037, | |
| "grad_norm": 4.657500083163389, | |
| "learning_rate": 3.641072120050221e-06, | |
| "loss": 0.6175, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.4152082076041038, | |
| "grad_norm": 3.5394692246958934, | |
| "learning_rate": 3.637947098624156e-06, | |
| "loss": 0.493, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.4156105411386039, | |
| "grad_norm": 3.6382852897066003, | |
| "learning_rate": 3.6348198328522565e-06, | |
| "loss": 0.4733, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.416012874673104, | |
| "grad_norm": 4.701593231983809, | |
| "learning_rate": 3.631690328902355e-06, | |
| "loss": 0.6759, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.4164152082076041, | |
| "grad_norm": 4.126519542614592, | |
| "learning_rate": 3.6285585929466982e-06, | |
| "loss": 0.6065, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.4168175417421042, | |
| "grad_norm": 4.174303064525279, | |
| "learning_rate": 3.6254246311619317e-06, | |
| "loss": 0.6019, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.4172198752766043, | |
| "grad_norm": 4.369071505943041, | |
| "learning_rate": 3.6222884497290937e-06, | |
| "loss": 0.47, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.4176222088111044, | |
| "grad_norm": 4.547283701461554, | |
| "learning_rate": 3.619150054833601e-06, | |
| "loss": 0.5666, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.4180245423456045, | |
| "grad_norm": 6.593645962994248, | |
| "learning_rate": 3.616009452665234e-06, | |
| "loss": 0.4854, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.41842687588010463, | |
| "grad_norm": 4.3350300541777145, | |
| "learning_rate": 3.6128666494181274e-06, | |
| "loss": 0.6522, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.4188292094146047, | |
| "grad_norm": 3.4916298319626233, | |
| "learning_rate": 3.6097216512907574e-06, | |
| "loss": 0.5449, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.4192315429491048, | |
| "grad_norm": 3.721480691979447, | |
| "learning_rate": 3.6065744644859276e-06, | |
| "loss": 0.4152, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.4196338764836049, | |
| "grad_norm": 4.419075622535455, | |
| "learning_rate": 3.6034250952107598e-06, | |
| "loss": 0.5998, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.420036210018105, | |
| "grad_norm": 4.066379360835346, | |
| "learning_rate": 3.6002735496766787e-06, | |
| "loss": 0.4443, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.4204385435526051, | |
| "grad_norm": 5.277931637633872, | |
| "learning_rate": 3.5971198340994035e-06, | |
| "loss": 0.6578, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.4208408770871052, | |
| "grad_norm": 4.171317951485179, | |
| "learning_rate": 3.5939639546989315e-06, | |
| "loss": 0.5362, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.4212432106216053, | |
| "grad_norm": 3.6581061080480786, | |
| "learning_rate": 3.5908059176995274e-06, | |
| "loss": 0.4868, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.4216455441561054, | |
| "grad_norm": 4.089304060454962, | |
| "learning_rate": 3.587645729329713e-06, | |
| "loss": 0.4962, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.42204787769060553, | |
| "grad_norm": 4.193929975116655, | |
| "learning_rate": 3.584483395822252e-06, | |
| "loss": 0.585, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.4224502112251056, | |
| "grad_norm": 4.4833727752476324, | |
| "learning_rate": 3.5813189234141386e-06, | |
| "loss": 0.5295, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4228525447596057, | |
| "grad_norm": 4.131740789977084, | |
| "learning_rate": 3.5781523183465868e-06, | |
| "loss": 0.5408, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.4232548782941058, | |
| "grad_norm": 3.972093255632944, | |
| "learning_rate": 3.574983586865015e-06, | |
| "loss": 0.5256, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.42365721182860594, | |
| "grad_norm": 4.619320510735413, | |
| "learning_rate": 3.571812735219037e-06, | |
| "loss": 0.6365, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.424059545363106, | |
| "grad_norm": 4.543758819054478, | |
| "learning_rate": 3.5686397696624486e-06, | |
| "loss": 0.5358, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.4244618788976061, | |
| "grad_norm": 3.9488894694458305, | |
| "learning_rate": 3.565464696453212e-06, | |
| "loss": 0.5101, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.4248642124321062, | |
| "grad_norm": 3.9008766737779217, | |
| "learning_rate": 3.5622875218534493e-06, | |
| "loss": 0.6157, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.4252665459666063, | |
| "grad_norm": 4.072195384389267, | |
| "learning_rate": 3.5591082521294264e-06, | |
| "loss": 0.572, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.42566887950110643, | |
| "grad_norm": 3.7407260188563622, | |
| "learning_rate": 3.555926893551539e-06, | |
| "loss": 0.4571, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.4260712130356065, | |
| "grad_norm": 4.030808733877317, | |
| "learning_rate": 3.552743452394306e-06, | |
| "loss": 0.5615, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.4264735465701066, | |
| "grad_norm": 3.833659387273303, | |
| "learning_rate": 3.5495579349363517e-06, | |
| "loss": 0.4978, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.4268758801046067, | |
| "grad_norm": 4.304988237070821, | |
| "learning_rate": 3.5463703474603964e-06, | |
| "loss": 0.6441, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.42727821363910684, | |
| "grad_norm": 4.68914366889499, | |
| "learning_rate": 3.5431806962532426e-06, | |
| "loss": 0.5009, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.42768054717360693, | |
| "grad_norm": 4.208417431491987, | |
| "learning_rate": 3.539988987605763e-06, | |
| "loss": 0.5556, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.428082880708107, | |
| "grad_norm": 3.913021062753838, | |
| "learning_rate": 3.5367952278128877e-06, | |
| "loss": 0.3997, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.4284852142426071, | |
| "grad_norm": 3.975035687293758, | |
| "learning_rate": 3.5335994231735927e-06, | |
| "loss": 0.5394, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.42888754777710725, | |
| "grad_norm": 3.969954958131227, | |
| "learning_rate": 3.5304015799908875e-06, | |
| "loss": 0.579, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.42928988131160734, | |
| "grad_norm": 4.942462188783348, | |
| "learning_rate": 3.5272017045718016e-06, | |
| "loss": 0.6723, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.4296922148461074, | |
| "grad_norm": 4.354492013681113, | |
| "learning_rate": 3.5239998032273727e-06, | |
| "loss": 0.5724, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.4300945483806075, | |
| "grad_norm": 4.275789917585218, | |
| "learning_rate": 3.520795882272634e-06, | |
| "loss": 0.5592, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.4304968819151076, | |
| "grad_norm": 4.0220453706013535, | |
| "learning_rate": 3.5175899480266023e-06, | |
| "loss": 0.6004, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.43089921544960774, | |
| "grad_norm": 3.956820759857449, | |
| "learning_rate": 3.5143820068122647e-06, | |
| "loss": 0.4667, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.43130154898410783, | |
| "grad_norm": 3.9359268522062996, | |
| "learning_rate": 3.5111720649565685e-06, | |
| "loss": 0.4766, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.4317038825186079, | |
| "grad_norm": 3.9665074015294555, | |
| "learning_rate": 3.507960128790402e-06, | |
| "loss": 0.5496, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.432106216053108, | |
| "grad_norm": 4.086635346156316, | |
| "learning_rate": 3.5047462046485934e-06, | |
| "loss": 0.4863, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.43250854958760815, | |
| "grad_norm": 4.234886651798155, | |
| "learning_rate": 3.501530298869886e-06, | |
| "loss": 0.5373, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.43291088312210824, | |
| "grad_norm": 4.578190826029045, | |
| "learning_rate": 3.4983124177969353e-06, | |
| "loss": 0.6952, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.4333132166566083, | |
| "grad_norm": 4.198352580671215, | |
| "learning_rate": 3.495092567776291e-06, | |
| "loss": 0.6753, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.4337155501911084, | |
| "grad_norm": 4.25314346947828, | |
| "learning_rate": 3.4918707551583853e-06, | |
| "loss": 0.6139, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.43411788372560856, | |
| "grad_norm": 3.7809774120139084, | |
| "learning_rate": 3.4886469862975224e-06, | |
| "loss": 0.5401, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.43452021726010864, | |
| "grad_norm": 3.861747118430867, | |
| "learning_rate": 3.485421267551865e-06, | |
| "loss": 0.4343, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.43492255079460873, | |
| "grad_norm": 4.1971689225974655, | |
| "learning_rate": 3.482193605283421e-06, | |
| "loss": 0.4651, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.4353248843291088, | |
| "grad_norm": 4.719733540576249, | |
| "learning_rate": 3.478964005858031e-06, | |
| "loss": 0.6265, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.4357272178636089, | |
| "grad_norm": 4.199121895188385, | |
| "learning_rate": 3.475732475645357e-06, | |
| "loss": 0.5915, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.43612955139810905, | |
| "grad_norm": 4.117901413415391, | |
| "learning_rate": 3.4724990210188693e-06, | |
| "loss": 0.5859, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.43653188493260914, | |
| "grad_norm": 4.063314519073365, | |
| "learning_rate": 3.4692636483558316e-06, | |
| "loss": 0.5799, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.4369342184671092, | |
| "grad_norm": 4.291259902841789, | |
| "learning_rate": 3.4660263640372926e-06, | |
| "loss": 0.5972, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.4373365520016093, | |
| "grad_norm": 4.317671430467203, | |
| "learning_rate": 3.462787174448071e-06, | |
| "loss": 0.5818, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.43773888553610946, | |
| "grad_norm": 4.041367856828536, | |
| "learning_rate": 3.459546085976743e-06, | |
| "loss": 0.5635, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.43814121907060954, | |
| "grad_norm": 5.295724094479603, | |
| "learning_rate": 3.45630310501563e-06, | |
| "loss": 0.6712, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.43854355260510963, | |
| "grad_norm": 4.415113705137869, | |
| "learning_rate": 3.453058237960785e-06, | |
| "loss": 0.578, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.4389458861396097, | |
| "grad_norm": 4.107191353105501, | |
| "learning_rate": 3.4498114912119817e-06, | |
| "loss": 0.5199, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.43934821967410986, | |
| "grad_norm": 4.1779462968909336, | |
| "learning_rate": 3.4465628711727022e-06, | |
| "loss": 0.577, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.43975055320860995, | |
| "grad_norm": 3.779499327289251, | |
| "learning_rate": 3.4433123842501205e-06, | |
| "loss": 0.4508, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.44015288674311004, | |
| "grad_norm": 3.9872787051249077, | |
| "learning_rate": 3.440060036855095e-06, | |
| "loss": 0.5583, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.4405552202776101, | |
| "grad_norm": 4.317372927534622, | |
| "learning_rate": 3.4368058354021526e-06, | |
| "loss": 0.5136, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.4409575538121102, | |
| "grad_norm": 3.7148972800031053, | |
| "learning_rate": 3.4335497863094773e-06, | |
| "loss": 0.514, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.44135988734661036, | |
| "grad_norm": 4.342233656787418, | |
| "learning_rate": 3.430291895998896e-06, | |
| "loss": 0.6742, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.44176222088111045, | |
| "grad_norm": 4.799719138290185, | |
| "learning_rate": 3.4270321708958686e-06, | |
| "loss": 0.6306, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.44216455441561053, | |
| "grad_norm": 3.9209860725209382, | |
| "learning_rate": 3.4237706174294726e-06, | |
| "loss": 0.5836, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.4425668879501106, | |
| "grad_norm": 4.127404724187905, | |
| "learning_rate": 3.420507242032392e-06, | |
| "loss": 0.5628, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.44296922148461076, | |
| "grad_norm": 4.956274683013535, | |
| "learning_rate": 3.4172420511409033e-06, | |
| "loss": 0.5366, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.44337155501911085, | |
| "grad_norm": 3.9527811079754285, | |
| "learning_rate": 3.413975051194865e-06, | |
| "loss": 0.4531, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.44377388855361094, | |
| "grad_norm": 4.364141896413706, | |
| "learning_rate": 3.4107062486377028e-06, | |
| "loss": 0.4914, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.44417622208811103, | |
| "grad_norm": 4.349108898092637, | |
| "learning_rate": 3.4074356499163976e-06, | |
| "loss": 0.7239, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.44457855562261117, | |
| "grad_norm": 4.038974541606429, | |
| "learning_rate": 3.4041632614814734e-06, | |
| "loss": 0.6375, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.44498088915711126, | |
| "grad_norm": 4.811612078810641, | |
| "learning_rate": 3.400889089786984e-06, | |
| "loss": 0.4963, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.44538322269161135, | |
| "grad_norm": 4.07473806896573, | |
| "learning_rate": 3.397613141290499e-06, | |
| "loss": 0.575, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.44578555622611143, | |
| "grad_norm": 4.160302353185681, | |
| "learning_rate": 3.3943354224530934e-06, | |
| "loss": 0.4766, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.4461878897606115, | |
| "grad_norm": 3.6900007830747934, | |
| "learning_rate": 3.3910559397393335e-06, | |
| "loss": 0.5544, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.44659022329511167, | |
| "grad_norm": 4.333978444807443, | |
| "learning_rate": 3.3877746996172645e-06, | |
| "loss": 0.5637, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.44699255682961175, | |
| "grad_norm": 3.768061403793603, | |
| "learning_rate": 3.384491708558399e-06, | |
| "loss": 0.4659, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.44739489036411184, | |
| "grad_norm": 3.9713683928936208, | |
| "learning_rate": 3.3812069730377007e-06, | |
| "loss": 0.539, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.44779722389861193, | |
| "grad_norm": 4.95129840549997, | |
| "learning_rate": 3.3779204995335747e-06, | |
| "loss": 0.6069, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.4481995574331121, | |
| "grad_norm": 4.316287155035261, | |
| "learning_rate": 3.3746322945278543e-06, | |
| "loss": 0.66, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.44860189096761216, | |
| "grad_norm": 4.697629623928124, | |
| "learning_rate": 3.3713423645057873e-06, | |
| "loss": 0.4885, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.44900422450211225, | |
| "grad_norm": 3.920242841924085, | |
| "learning_rate": 3.368050715956025e-06, | |
| "loss": 0.5213, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.44940655803661234, | |
| "grad_norm": 4.521566910606868, | |
| "learning_rate": 3.3647573553706052e-06, | |
| "loss": 0.5743, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.4498088915711125, | |
| "grad_norm": 4.082210637179168, | |
| "learning_rate": 3.3614622892449465e-06, | |
| "loss": 0.5361, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.45021122510561257, | |
| "grad_norm": 4.017855379880608, | |
| "learning_rate": 3.3581655240778277e-06, | |
| "loss": 0.5492, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.45061355864011265, | |
| "grad_norm": 3.3069156613524577, | |
| "learning_rate": 3.35486706637138e-06, | |
| "loss": 0.4142, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.45101589217461274, | |
| "grad_norm": 4.230766496789279, | |
| "learning_rate": 3.3515669226310716e-06, | |
| "loss": 0.5883, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.45141822570911283, | |
| "grad_norm": 3.932193721134766, | |
| "learning_rate": 3.3482650993656983e-06, | |
| "loss": 0.4732, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.451820559243613, | |
| "grad_norm": 3.9297564952360577, | |
| "learning_rate": 3.344961603087367e-06, | |
| "loss": 0.5852, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.45222289277811306, | |
| "grad_norm": 4.909401223605511, | |
| "learning_rate": 3.341656440311484e-06, | |
| "loss": 0.5709, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.45262522631261315, | |
| "grad_norm": 3.6889862040375525, | |
| "learning_rate": 3.3383496175567435e-06, | |
| "loss": 0.5966, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.45302755984711324, | |
| "grad_norm": 3.4699341807873694, | |
| "learning_rate": 3.3350411413451125e-06, | |
| "loss": 0.4692, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.4534298933816134, | |
| "grad_norm": 4.481843121210536, | |
| "learning_rate": 3.3317310182018186e-06, | |
| "loss": 0.6374, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.45383222691611347, | |
| "grad_norm": 4.674277827776204, | |
| "learning_rate": 3.328419254655339e-06, | |
| "loss": 0.7174, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.45423456045061356, | |
| "grad_norm": 3.8262305960434575, | |
| "learning_rate": 3.325105857237386e-06, | |
| "loss": 0.4618, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.45463689398511364, | |
| "grad_norm": 3.9899678016482305, | |
| "learning_rate": 3.3217908324828942e-06, | |
| "loss": 0.5839, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.4550392275196138, | |
| "grad_norm": 3.39198458807276, | |
| "learning_rate": 3.3184741869300073e-06, | |
| "loss": 0.5191, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.4554415610541139, | |
| "grad_norm": 4.032286065452944, | |
| "learning_rate": 3.3151559271200656e-06, | |
| "loss": 0.4835, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.45584389458861396, | |
| "grad_norm": 4.381591989574539, | |
| "learning_rate": 3.3118360595975947e-06, | |
| "loss": 0.5592, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.45624622812311405, | |
| "grad_norm": 4.022213508423109, | |
| "learning_rate": 3.3085145909102885e-06, | |
| "loss": 0.5304, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.45664856165761414, | |
| "grad_norm": 3.834367329162151, | |
| "learning_rate": 3.305191527609e-06, | |
| "loss": 0.4678, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.4570508951921143, | |
| "grad_norm": 4.448203020700801, | |
| "learning_rate": 3.3018668762477294e-06, | |
| "loss": 0.5172, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.45745322872661437, | |
| "grad_norm": 4.128965375687235, | |
| "learning_rate": 3.2985406433836055e-06, | |
| "loss": 0.4742, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.45785556226111446, | |
| "grad_norm": 4.129504251009841, | |
| "learning_rate": 3.295212835576878e-06, | |
| "loss": 0.5873, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.45825789579561454, | |
| "grad_norm": 4.422224105540513, | |
| "learning_rate": 3.2918834593909028e-06, | |
| "loss": 0.6049, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.4586602293301147, | |
| "grad_norm": 4.434937349809798, | |
| "learning_rate": 3.2885525213921286e-06, | |
| "loss": 0.5708, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4590625628646148, | |
| "grad_norm": 3.668465429492197, | |
| "learning_rate": 3.2852200281500855e-06, | |
| "loss": 0.4559, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.45946489639911486, | |
| "grad_norm": 4.232532968956556, | |
| "learning_rate": 3.2818859862373696e-06, | |
| "loss": 0.5721, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.45986722993361495, | |
| "grad_norm": 4.7338258325238085, | |
| "learning_rate": 3.2785504022296316e-06, | |
| "loss": 0.6157, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.4602695634681151, | |
| "grad_norm": 3.7791426452155723, | |
| "learning_rate": 3.2752132827055646e-06, | |
| "loss": 0.5458, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.4606718970026152, | |
| "grad_norm": 4.1019637445172314, | |
| "learning_rate": 3.2718746342468894e-06, | |
| "loss": 0.5972, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.46107423053711527, | |
| "grad_norm": 5.277495798462077, | |
| "learning_rate": 3.268534463438342e-06, | |
| "loss": 0.8266, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.46147656407161536, | |
| "grad_norm": 4.184453273932426, | |
| "learning_rate": 3.2651927768676626e-06, | |
| "loss": 0.4562, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.46187889760611545, | |
| "grad_norm": 4.45907273350243, | |
| "learning_rate": 3.261849581125579e-06, | |
| "loss": 0.6532, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.4622812311406156, | |
| "grad_norm": 3.965076229787784, | |
| "learning_rate": 3.258504882805796e-06, | |
| "loss": 0.5247, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.4626835646751157, | |
| "grad_norm": 4.461055711184354, | |
| "learning_rate": 3.2551586885049814e-06, | |
| "loss": 0.4588, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.46308589820961576, | |
| "grad_norm": 4.3124706302870255, | |
| "learning_rate": 3.2518110048227553e-06, | |
| "loss": 0.7004, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.46348823174411585, | |
| "grad_norm": 4.326890541788323, | |
| "learning_rate": 3.2484618383616735e-06, | |
| "loss": 0.5989, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.463890565278616, | |
| "grad_norm": 4.323105374837098, | |
| "learning_rate": 3.2451111957272173e-06, | |
| "loss": 0.5451, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.4642928988131161, | |
| "grad_norm": 4.134824677878268, | |
| "learning_rate": 3.241759083527779e-06, | |
| "loss": 0.5589, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.46469523234761617, | |
| "grad_norm": 4.0981640141993365, | |
| "learning_rate": 3.238405508374649e-06, | |
| "loss": 0.5771, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.46509756588211626, | |
| "grad_norm": 3.7455178072585644, | |
| "learning_rate": 3.2350504768820036e-06, | |
| "loss": 0.5125, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.46549989941661635, | |
| "grad_norm": 3.72359460364635, | |
| "learning_rate": 3.23169399566689e-06, | |
| "loss": 0.4213, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.4659022329511165, | |
| "grad_norm": 3.509972924609966, | |
| "learning_rate": 3.228336071349218e-06, | |
| "loss": 0.4757, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.4663045664856166, | |
| "grad_norm": 4.028400275439829, | |
| "learning_rate": 3.2249767105517395e-06, | |
| "loss": 0.5022, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.46670690002011667, | |
| "grad_norm": 4.150972255373417, | |
| "learning_rate": 3.221615919900042e-06, | |
| "loss": 0.6257, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.46710923355461675, | |
| "grad_norm": 4.494189407683215, | |
| "learning_rate": 3.218253706022533e-06, | |
| "loss": 0.5914, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.4675115670891169, | |
| "grad_norm": 3.558180537366331, | |
| "learning_rate": 3.2148900755504253e-06, | |
| "loss": 0.5102, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.467913900623617, | |
| "grad_norm": 3.438406963531958, | |
| "learning_rate": 3.2115250351177274e-06, | |
| "loss": 0.515, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.4683162341581171, | |
| "grad_norm": 4.323925078750027, | |
| "learning_rate": 3.208158591361228e-06, | |
| "loss": 0.5109, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.46871856769261716, | |
| "grad_norm": 4.1534707963201285, | |
| "learning_rate": 3.204790750920484e-06, | |
| "loss": 0.5736, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.4691209012271173, | |
| "grad_norm": 3.6871720181954406, | |
| "learning_rate": 3.2014215204378064e-06, | |
| "loss": 0.5765, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.4695232347616174, | |
| "grad_norm": 3.9220478852176264, | |
| "learning_rate": 3.1980509065582476e-06, | |
| "loss": 0.5545, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.4699255682961175, | |
| "grad_norm": 4.603512689826395, | |
| "learning_rate": 3.194678915929589e-06, | |
| "loss": 0.5587, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.47032790183061757, | |
| "grad_norm": 4.0356910482849075, | |
| "learning_rate": 3.1913055552023263e-06, | |
| "loss": 0.4642, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.47073023536511766, | |
| "grad_norm": 4.251943286279792, | |
| "learning_rate": 3.1879308310296586e-06, | |
| "loss": 0.5496, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4711325688996178, | |
| "grad_norm": 4.189042957707438, | |
| "learning_rate": 3.1845547500674737e-06, | |
| "loss": 0.5646, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.4715349024341179, | |
| "grad_norm": 3.3326084801089353, | |
| "learning_rate": 3.1811773189743355e-06, | |
| "loss": 0.4837, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.471937235968618, | |
| "grad_norm": 4.523273563587766, | |
| "learning_rate": 3.17779854441147e-06, | |
| "loss": 0.6807, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.47233956950311806, | |
| "grad_norm": 3.7232216855517315, | |
| "learning_rate": 3.1744184330427543e-06, | |
| "loss": 0.4492, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.4727419030376182, | |
| "grad_norm": 3.8162014027986437, | |
| "learning_rate": 3.1710369915347e-06, | |
| "loss": 0.5626, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.4731442365721183, | |
| "grad_norm": 4.575379469711398, | |
| "learning_rate": 3.1676542265564443e-06, | |
| "loss": 0.6358, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.4735465701066184, | |
| "grad_norm": 4.563047737373793, | |
| "learning_rate": 3.1642701447797324e-06, | |
| "loss": 0.6282, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.47394890364111847, | |
| "grad_norm": 4.254463762469985, | |
| "learning_rate": 3.1608847528789096e-06, | |
| "loss": 0.4974, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.4743512371756186, | |
| "grad_norm": 4.290561801295637, | |
| "learning_rate": 3.1574980575309015e-06, | |
| "loss": 0.6116, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.4747535707101187, | |
| "grad_norm": 4.276362615412023, | |
| "learning_rate": 3.154110065415208e-06, | |
| "loss": 0.5274, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.4751559042446188, | |
| "grad_norm": 3.8389354369931374, | |
| "learning_rate": 3.150720783213883e-06, | |
| "loss": 0.5338, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.4755582377791189, | |
| "grad_norm": 4.448041403696648, | |
| "learning_rate": 3.1473302176115283e-06, | |
| "loss": 0.5265, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.47596057131361896, | |
| "grad_norm": 4.46050378901124, | |
| "learning_rate": 3.1439383752952753e-06, | |
| "loss": 0.6299, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.4763629048481191, | |
| "grad_norm": 4.111077775261664, | |
| "learning_rate": 3.140545262954772e-06, | |
| "loss": 0.5723, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.4767652383826192, | |
| "grad_norm": 3.678492409328739, | |
| "learning_rate": 3.137150887282174e-06, | |
| "loss": 0.4178, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.4771675719171193, | |
| "grad_norm": 3.661689522078919, | |
| "learning_rate": 3.1337552549721262e-06, | |
| "loss": 0.4658, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.47756990545161937, | |
| "grad_norm": 4.250351645694971, | |
| "learning_rate": 3.130358372721754e-06, | |
| "loss": 0.5614, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.4779722389861195, | |
| "grad_norm": 3.599583256588181, | |
| "learning_rate": 3.126960247230646e-06, | |
| "loss": 0.4569, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.4783745725206196, | |
| "grad_norm": 3.821896657809243, | |
| "learning_rate": 3.1235608852008446e-06, | |
| "loss": 0.4834, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.4787769060551197, | |
| "grad_norm": 3.9335780159969906, | |
| "learning_rate": 3.1201602933368308e-06, | |
| "loss": 0.5152, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.4791792395896198, | |
| "grad_norm": 4.770540317196732, | |
| "learning_rate": 3.116758478345509e-06, | |
| "loss": 0.5948, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.4795815731241199, | |
| "grad_norm": 3.998481220511283, | |
| "learning_rate": 3.1133554469361976e-06, | |
| "loss": 0.4881, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.47998390665862, | |
| "grad_norm": 4.208648289802026, | |
| "learning_rate": 3.109951205820615e-06, | |
| "loss": 0.4933, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.4803862401931201, | |
| "grad_norm": 3.752607346638518, | |
| "learning_rate": 3.1065457617128635e-06, | |
| "loss": 0.547, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.4807885737276202, | |
| "grad_norm": 4.151251812002077, | |
| "learning_rate": 3.1031391213294203e-06, | |
| "loss": 0.5015, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.48119090726212027, | |
| "grad_norm": 4.7037289072786095, | |
| "learning_rate": 3.0997312913891196e-06, | |
| "loss": 0.6795, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.4815932407966204, | |
| "grad_norm": 4.364147422750159, | |
| "learning_rate": 3.0963222786131435e-06, | |
| "loss": 0.5422, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.4819955743311205, | |
| "grad_norm": 3.301076213262608, | |
| "learning_rate": 3.0929120897250066e-06, | |
| "loss": 0.4646, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.4823979078656206, | |
| "grad_norm": 3.803770968997955, | |
| "learning_rate": 3.0895007314505415e-06, | |
| "loss": 0.4579, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.4828002414001207, | |
| "grad_norm": 3.6319529462360816, | |
| "learning_rate": 3.0860882105178897e-06, | |
| "loss": 0.5152, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4832025749346208, | |
| "grad_norm": 3.7950010562493453, | |
| "learning_rate": 3.082674533657484e-06, | |
| "loss": 0.588, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.4836049084691209, | |
| "grad_norm": 3.9284108228666796, | |
| "learning_rate": 3.079259707602038e-06, | |
| "loss": 0.5565, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.484007242003621, | |
| "grad_norm": 4.764319789283065, | |
| "learning_rate": 3.0758437390865314e-06, | |
| "loss": 0.6115, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.4844095755381211, | |
| "grad_norm": 3.781006793375389, | |
| "learning_rate": 3.0724266348481966e-06, | |
| "loss": 0.5239, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.4848119090726212, | |
| "grad_norm": 4.259150542468107, | |
| "learning_rate": 3.069008401626507e-06, | |
| "loss": 0.5505, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.4852142426071213, | |
| "grad_norm": 4.222901377467788, | |
| "learning_rate": 3.065589046163162e-06, | |
| "loss": 0.5425, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.4856165761416214, | |
| "grad_norm": 3.783266465549031, | |
| "learning_rate": 3.062168575202075e-06, | |
| "loss": 0.542, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.4860189096761215, | |
| "grad_norm": 4.428497795054238, | |
| "learning_rate": 3.0587469954893584e-06, | |
| "loss": 0.6567, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.4864212432106216, | |
| "grad_norm": 3.691837740063112, | |
| "learning_rate": 3.0553243137733125e-06, | |
| "loss": 0.4558, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.4868235767451217, | |
| "grad_norm": 4.073296743584824, | |
| "learning_rate": 3.051900536804411e-06, | |
| "loss": 0.5664, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4872259102796218, | |
| "grad_norm": 3.7192683213034234, | |
| "learning_rate": 3.0484756713352858e-06, | |
| "loss": 0.494, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.4876282438141219, | |
| "grad_norm": 3.6377595381026557, | |
| "learning_rate": 3.045049724120718e-06, | |
| "loss": 0.4885, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.488030577348622, | |
| "grad_norm": 4.338683717827157, | |
| "learning_rate": 3.0416227019176208e-06, | |
| "loss": 0.6165, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.48843291088312213, | |
| "grad_norm": 4.374868800699678, | |
| "learning_rate": 3.0381946114850293e-06, | |
| "loss": 0.5755, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.4888352444176222, | |
| "grad_norm": 4.54077234147531, | |
| "learning_rate": 3.034765459584083e-06, | |
| "loss": 0.5954, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.4892375779521223, | |
| "grad_norm": 4.166268412885333, | |
| "learning_rate": 3.031335252978017e-06, | |
| "loss": 0.6657, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.4896399114866224, | |
| "grad_norm": 3.675310094808539, | |
| "learning_rate": 3.027903998432145e-06, | |
| "loss": 0.4676, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.49004224502112254, | |
| "grad_norm": 4.091571307318168, | |
| "learning_rate": 3.0244717027138494e-06, | |
| "loss": 0.579, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.4904445785556226, | |
| "grad_norm": 4.2620047470347595, | |
| "learning_rate": 3.021038372592563e-06, | |
| "loss": 0.577, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.4908469120901227, | |
| "grad_norm": 3.687909147915886, | |
| "learning_rate": 3.017604014839762e-06, | |
| "loss": 0.557, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.4912492456246228, | |
| "grad_norm": 4.003092692480048, | |
| "learning_rate": 3.014168636228948e-06, | |
| "loss": 0.5517, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.4916515791591229, | |
| "grad_norm": 4.349298145062102, | |
| "learning_rate": 3.010732243535636e-06, | |
| "loss": 0.5991, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.49205391269362303, | |
| "grad_norm": 3.9645619754575963, | |
| "learning_rate": 3.0072948435373408e-06, | |
| "loss": 0.5145, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.4924562462281231, | |
| "grad_norm": 4.216377733109406, | |
| "learning_rate": 3.003856443013564e-06, | |
| "loss": 0.6017, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.4928585797626232, | |
| "grad_norm": 3.9206543304610584, | |
| "learning_rate": 3.0004170487457813e-06, | |
| "loss": 0.5468, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.4932609132971233, | |
| "grad_norm": 4.196664240626691, | |
| "learning_rate": 2.996976667517427e-06, | |
| "loss": 0.5191, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.49366324683162344, | |
| "grad_norm": 3.7775234111981493, | |
| "learning_rate": 2.9935353061138833e-06, | |
| "loss": 0.5094, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.4940655803661235, | |
| "grad_norm": 4.412452469732304, | |
| "learning_rate": 2.990092971322464e-06, | |
| "loss": 0.566, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.4944679139006236, | |
| "grad_norm": 3.967665512410053, | |
| "learning_rate": 2.986649669932405e-06, | |
| "loss": 0.5005, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.4948702474351237, | |
| "grad_norm": 3.8061725506913353, | |
| "learning_rate": 2.983205408734846e-06, | |
| "loss": 0.5214, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.49527258096962384, | |
| "grad_norm": 3.615684737615807, | |
| "learning_rate": 2.9797601945228214e-06, | |
| "loss": 0.4647, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.49567491450412393, | |
| "grad_norm": 4.27866020604673, | |
| "learning_rate": 2.9763140340912463e-06, | |
| "loss": 0.6409, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.496077248038624, | |
| "grad_norm": 3.596076423790147, | |
| "learning_rate": 2.972866934236898e-06, | |
| "loss": 0.412, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.4964795815731241, | |
| "grad_norm": 4.0355620490752955, | |
| "learning_rate": 2.9694189017584097e-06, | |
| "loss": 0.5596, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.4968819151076242, | |
| "grad_norm": 4.148051125186017, | |
| "learning_rate": 2.965969943456254e-06, | |
| "loss": 0.5191, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.49728424864212434, | |
| "grad_norm": 4.2692044767988, | |
| "learning_rate": 2.962520066132728e-06, | |
| "loss": 0.5066, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.4976865821766244, | |
| "grad_norm": 4.28381423937242, | |
| "learning_rate": 2.9590692765919436e-06, | |
| "loss": 0.598, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.4980889157111245, | |
| "grad_norm": 4.175358078657294, | |
| "learning_rate": 2.955617581639809e-06, | |
| "loss": 0.5953, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.4984912492456246, | |
| "grad_norm": 4.5191335184254955, | |
| "learning_rate": 2.952164988084021e-06, | |
| "loss": 0.6236, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.49889358278012474, | |
| "grad_norm": 4.126996114146587, | |
| "learning_rate": 2.9487115027340458e-06, | |
| "loss": 0.5455, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.49929591631462483, | |
| "grad_norm": 3.978032878939077, | |
| "learning_rate": 2.9452571324011103e-06, | |
| "loss": 0.5658, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.4996982498491249, | |
| "grad_norm": 4.494778767380183, | |
| "learning_rate": 2.941801883898187e-06, | |
| "loss": 0.6639, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.5001005833836251, | |
| "grad_norm": 3.7291179386624376, | |
| "learning_rate": 2.9383457640399794e-06, | |
| "loss": 0.4724, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.5005029169181251, | |
| "grad_norm": 4.0158904670389175, | |
| "learning_rate": 2.9348887796429104e-06, | |
| "loss": 0.5611, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.5009052504526252, | |
| "grad_norm": 3.93870398764487, | |
| "learning_rate": 2.9314309375251065e-06, | |
| "loss": 0.5596, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.5013075839871253, | |
| "grad_norm": 4.111802544399985, | |
| "learning_rate": 2.9279722445063883e-06, | |
| "loss": 0.5872, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.5017099175216254, | |
| "grad_norm": 3.948101448350297, | |
| "learning_rate": 2.9245127074082535e-06, | |
| "loss": 0.5131, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.5021122510561256, | |
| "grad_norm": 4.345129844652909, | |
| "learning_rate": 2.9210523330538625e-06, | |
| "loss": 0.6389, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.5025145845906256, | |
| "grad_norm": 3.6367326824064112, | |
| "learning_rate": 2.9175911282680307e-06, | |
| "loss": 0.4518, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.5029169181251257, | |
| "grad_norm": 4.027476673290246, | |
| "learning_rate": 2.914129099877208e-06, | |
| "loss": 0.5234, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5033192516596259, | |
| "grad_norm": 3.450546244138055, | |
| "learning_rate": 2.9106662547094724e-06, | |
| "loss": 0.3807, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.5037215851941259, | |
| "grad_norm": 3.696528336339893, | |
| "learning_rate": 2.907202599594508e-06, | |
| "loss": 0.3884, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.504123918728626, | |
| "grad_norm": 4.252344685184106, | |
| "learning_rate": 2.9037381413636e-06, | |
| "loss": 0.6935, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.5045262522631261, | |
| "grad_norm": 4.336362311318111, | |
| "learning_rate": 2.900272886849616e-06, | |
| "loss": 0.5331, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.5049285857976262, | |
| "grad_norm": 3.6842567382976688, | |
| "learning_rate": 2.8968068428869955e-06, | |
| "loss": 0.5545, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.5053309193321264, | |
| "grad_norm": 3.699409904804457, | |
| "learning_rate": 2.8933400163117337e-06, | |
| "loss": 0.3754, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.5057332528666264, | |
| "grad_norm": 4.205702275086939, | |
| "learning_rate": 2.889872413961369e-06, | |
| "loss": 0.6633, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.5061355864011265, | |
| "grad_norm": 3.6639853190462626, | |
| "learning_rate": 2.886404042674972e-06, | |
| "loss": 0.3892, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.5065379199356266, | |
| "grad_norm": 3.5640332071999765, | |
| "learning_rate": 2.882934909293127e-06, | |
| "loss": 0.4084, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.5069402534701267, | |
| "grad_norm": 4.0471975082058025, | |
| "learning_rate": 2.8794650206579234e-06, | |
| "loss": 0.5812, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5073425870046269, | |
| "grad_norm": 4.0654643808596616, | |
| "learning_rate": 2.875994383612939e-06, | |
| "loss": 0.568, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.5077449205391269, | |
| "grad_norm": 4.330240433028303, | |
| "learning_rate": 2.872523005003229e-06, | |
| "loss": 0.5165, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.508147254073627, | |
| "grad_norm": 4.015050113056623, | |
| "learning_rate": 2.869050891675309e-06, | |
| "loss": 0.5002, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.5085495876081272, | |
| "grad_norm": 4.393983632365189, | |
| "learning_rate": 2.8655780504771476e-06, | |
| "loss": 0.573, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.5089519211426272, | |
| "grad_norm": 3.193554396038774, | |
| "learning_rate": 2.8621044882581433e-06, | |
| "loss": 0.3709, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.5093542546771274, | |
| "grad_norm": 3.3841882285013813, | |
| "learning_rate": 2.8586302118691223e-06, | |
| "loss": 0.4387, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.5097565882116274, | |
| "grad_norm": 3.9970258932238742, | |
| "learning_rate": 2.8551552281623157e-06, | |
| "loss": 0.5225, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.5101589217461275, | |
| "grad_norm": 4.513606814044003, | |
| "learning_rate": 2.85167954399135e-06, | |
| "loss": 0.5528, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.5105612552806277, | |
| "grad_norm": 4.684727392976221, | |
| "learning_rate": 2.848203166211235e-06, | |
| "loss": 0.6793, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.5109635888151277, | |
| "grad_norm": 4.222583621631339, | |
| "learning_rate": 2.8447261016783474e-06, | |
| "loss": 0.6053, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5113659223496279, | |
| "grad_norm": 3.8724976223748744, | |
| "learning_rate": 2.841248357250418e-06, | |
| "loss": 0.4797, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.5117682558841279, | |
| "grad_norm": 3.5925982833624652, | |
| "learning_rate": 2.8377699397865184e-06, | |
| "loss": 0.6524, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.512170589418628, | |
| "grad_norm": 4.4527497576787285, | |
| "learning_rate": 2.8342908561470496e-06, | |
| "loss": 0.6426, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.5125729229531282, | |
| "grad_norm": 3.758694762638942, | |
| "learning_rate": 2.8308111131937237e-06, | |
| "loss": 0.4863, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.5129752564876282, | |
| "grad_norm": 3.432356742958401, | |
| "learning_rate": 2.827330717789555e-06, | |
| "loss": 0.5223, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.5133775900221283, | |
| "grad_norm": 4.33004996309828, | |
| "learning_rate": 2.8238496767988433e-06, | |
| "loss": 0.6004, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.5137799235566285, | |
| "grad_norm": 4.200608717420535, | |
| "learning_rate": 2.820367997087164e-06, | |
| "loss": 0.6228, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.5141822570911285, | |
| "grad_norm": 4.0453448815375035, | |
| "learning_rate": 2.8168856855213493e-06, | |
| "loss": 0.6242, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.5145845906256287, | |
| "grad_norm": 4.227997150733838, | |
| "learning_rate": 2.81340274896948e-06, | |
| "loss": 0.6175, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.5149869241601287, | |
| "grad_norm": 3.68997651052952, | |
| "learning_rate": 2.8099191943008673e-06, | |
| "loss": 0.3891, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5153892576946288, | |
| "grad_norm": 3.983876769382806, | |
| "learning_rate": 2.806435028386044e-06, | |
| "loss": 0.4676, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.515791591229129, | |
| "grad_norm": 3.7821657559918362, | |
| "learning_rate": 2.8029502580967467e-06, | |
| "loss": 0.5712, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.516193924763629, | |
| "grad_norm": 4.613891160258615, | |
| "learning_rate": 2.7994648903059034e-06, | |
| "loss": 0.6892, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.5165962582981292, | |
| "grad_norm": 4.260048115855587, | |
| "learning_rate": 2.7959789318876235e-06, | |
| "loss": 0.587, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.5169985918326292, | |
| "grad_norm": 3.2651956443139905, | |
| "learning_rate": 2.792492389717178e-06, | |
| "loss": 0.4202, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.5174009253671293, | |
| "grad_norm": 3.8441567338972136, | |
| "learning_rate": 2.7890052706709913e-06, | |
| "loss": 0.5274, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.5178032589016295, | |
| "grad_norm": 4.244436141220556, | |
| "learning_rate": 2.785517581626624e-06, | |
| "loss": 0.5557, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.5182055924361295, | |
| "grad_norm": 3.777185766227343, | |
| "learning_rate": 2.782029329462763e-06, | |
| "loss": 0.5508, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.5186079259706297, | |
| "grad_norm": 4.024624316501224, | |
| "learning_rate": 2.778540521059204e-06, | |
| "loss": 0.4091, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.5190102595051298, | |
| "grad_norm": 3.429656579938711, | |
| "learning_rate": 2.775051163296839e-06, | |
| "loss": 0.373, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5194125930396298, | |
| "grad_norm": 4.115516308278128, | |
| "learning_rate": 2.771561263057647e-06, | |
| "loss": 0.6296, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.51981492657413, | |
| "grad_norm": 4.5340536303593, | |
| "learning_rate": 2.7680708272246738e-06, | |
| "loss": 0.5478, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.52021726010863, | |
| "grad_norm": 4.1573055283514675, | |
| "learning_rate": 2.7645798626820226e-06, | |
| "loss": 0.5928, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.5206195936431302, | |
| "grad_norm": 3.5540381627057926, | |
| "learning_rate": 2.7610883763148382e-06, | |
| "loss": 0.4204, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.5210219271776303, | |
| "grad_norm": 3.7082383118644406, | |
| "learning_rate": 2.7575963750092976e-06, | |
| "loss": 0.5897, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.5214242607121303, | |
| "grad_norm": 3.670768517938044, | |
| "learning_rate": 2.7541038656525906e-06, | |
| "loss": 0.4524, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.5218265942466305, | |
| "grad_norm": 3.9599618494551336, | |
| "learning_rate": 2.7506108551329084e-06, | |
| "loss": 0.5334, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.5222289277811305, | |
| "grad_norm": 3.8332760152691425, | |
| "learning_rate": 2.747117350339434e-06, | |
| "loss": 0.5027, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.5226312613156306, | |
| "grad_norm": 3.5387447785248463, | |
| "learning_rate": 2.743623358162322e-06, | |
| "loss": 0.4453, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.5230335948501308, | |
| "grad_norm": 3.665534245478194, | |
| "learning_rate": 2.74012888549269e-06, | |
| "loss": 0.4541, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5234359283846308, | |
| "grad_norm": 4.118363799618076, | |
| "learning_rate": 2.736633939222604e-06, | |
| "loss": 0.4863, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.523838261919131, | |
| "grad_norm": 4.07159865432201, | |
| "learning_rate": 2.733138526245061e-06, | |
| "loss": 0.5836, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.5242405954536311, | |
| "grad_norm": 4.9413620392110955, | |
| "learning_rate": 2.729642653453981e-06, | |
| "loss": 0.6331, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.5246429289881311, | |
| "grad_norm": 4.349366316003809, | |
| "learning_rate": 2.7261463277441913e-06, | |
| "loss": 0.6584, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.5250452625226313, | |
| "grad_norm": 4.013818084372327, | |
| "learning_rate": 2.7226495560114097e-06, | |
| "loss": 0.6444, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.5254475960571313, | |
| "grad_norm": 3.9731448823583158, | |
| "learning_rate": 2.719152345152237e-06, | |
| "loss": 0.4717, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.5258499295916315, | |
| "grad_norm": 3.836192951376196, | |
| "learning_rate": 2.7156547020641377e-06, | |
| "loss": 0.4813, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.5262522631261316, | |
| "grad_norm": 4.526603353793837, | |
| "learning_rate": 2.7121566336454295e-06, | |
| "loss": 0.6223, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.5266545966606316, | |
| "grad_norm": 4.542236066581728, | |
| "learning_rate": 2.70865814679527e-06, | |
| "loss": 0.541, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.5270569301951318, | |
| "grad_norm": 4.477320709723044, | |
| "learning_rate": 2.70515924841364e-06, | |
| "loss": 0.4898, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.5274592637296318, | |
| "grad_norm": 4.188969544607867, | |
| "learning_rate": 2.7016599454013324e-06, | |
| "loss": 0.478, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.527861597264132, | |
| "grad_norm": 3.57921084312417, | |
| "learning_rate": 2.6981602446599408e-06, | |
| "loss": 0.4797, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.5282639307986321, | |
| "grad_norm": 3.4828294543575784, | |
| "learning_rate": 2.69466015309184e-06, | |
| "loss": 0.4464, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.5286662643331321, | |
| "grad_norm": 3.6542821945407002, | |
| "learning_rate": 2.691159677600177e-06, | |
| "loss": 0.5241, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.5290685978676323, | |
| "grad_norm": 4.138582178147402, | |
| "learning_rate": 2.6876588250888556e-06, | |
| "loss": 0.4976, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.5294709314021324, | |
| "grad_norm": 4.156252344667509, | |
| "learning_rate": 2.6841576024625247e-06, | |
| "loss": 0.5346, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.5298732649366324, | |
| "grad_norm": 3.9783539180873118, | |
| "learning_rate": 2.680656016626561e-06, | |
| "loss": 0.5054, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.5302755984711326, | |
| "grad_norm": 4.167749289113631, | |
| "learning_rate": 2.677154074487057e-06, | |
| "loss": 0.5308, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.5306779320056326, | |
| "grad_norm": 3.6243864679157536, | |
| "learning_rate": 2.6736517829508124e-06, | |
| "loss": 0.4969, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.5310802655401328, | |
| "grad_norm": 4.080652153118834, | |
| "learning_rate": 2.6701491489253107e-06, | |
| "loss": 0.5166, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.5314825990746329, | |
| "grad_norm": 4.386897362383017, | |
| "learning_rate": 2.6666461793187143e-06, | |
| "loss": 0.4953, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 0.5318849326091329, | |
| "grad_norm": 4.286991486124671, | |
| "learning_rate": 2.6631428810398453e-06, | |
| "loss": 0.4804, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.5322872661436331, | |
| "grad_norm": 3.996180824655324, | |
| "learning_rate": 2.659639260998176e-06, | |
| "loss": 0.5182, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.5326895996781331, | |
| "grad_norm": 4.305112080018121, | |
| "learning_rate": 2.6561353261038127e-06, | |
| "loss": 0.6548, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.5330919332126333, | |
| "grad_norm": 3.6190226327616557, | |
| "learning_rate": 2.6526310832674807e-06, | |
| "loss": 0.5383, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.5334942667471334, | |
| "grad_norm": 4.316996697367821, | |
| "learning_rate": 2.6491265394005157e-06, | |
| "loss": 0.5516, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.5338966002816334, | |
| "grad_norm": 4.0648368660213325, | |
| "learning_rate": 2.645621701414845e-06, | |
| "loss": 0.6482, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.5342989338161336, | |
| "grad_norm": 3.7739876249793047, | |
| "learning_rate": 2.6421165762229777e-06, | |
| "loss": 0.4783, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.5347012673506337, | |
| "grad_norm": 3.789987275308021, | |
| "learning_rate": 2.6386111707379873e-06, | |
| "loss": 0.4525, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.5351036008851338, | |
| "grad_norm": 3.630575939542573, | |
| "learning_rate": 2.635105491873502e-06, | |
| "loss": 0.5329, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5355059344196339, | |
| "grad_norm": 3.961557081061209, | |
| "learning_rate": 2.6315995465436878e-06, | |
| "loss": 0.5432, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.5359082679541339, | |
| "grad_norm": 3.8780055120857204, | |
| "learning_rate": 2.628093341663236e-06, | |
| "loss": 0.454, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.5363106014886341, | |
| "grad_norm": 4.031126634163913, | |
| "learning_rate": 2.6245868841473525e-06, | |
| "loss": 0.5452, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.5367129350231342, | |
| "grad_norm": 3.3788818500452997, | |
| "learning_rate": 2.6210801809117377e-06, | |
| "loss": 0.4364, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.5371152685576343, | |
| "grad_norm": 4.566291916332284, | |
| "learning_rate": 2.61757323887258e-06, | |
| "loss": 0.6427, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.5375176020921344, | |
| "grad_norm": 4.46318346856033, | |
| "learning_rate": 2.614066064946536e-06, | |
| "loss": 0.6142, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.5379199356266344, | |
| "grad_norm": 3.8724650951472377, | |
| "learning_rate": 2.6105586660507214e-06, | |
| "loss": 0.4865, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.5383222691611346, | |
| "grad_norm": 4.107266394665508, | |
| "learning_rate": 2.607051049102696e-06, | |
| "loss": 0.5618, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.5387246026956347, | |
| "grad_norm": 4.131578269950126, | |
| "learning_rate": 2.6035432210204475e-06, | |
| "loss": 0.6571, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 0.5391269362301347, | |
| "grad_norm": 4.135237490425349, | |
| "learning_rate": 2.6000351887223817e-06, | |
| "loss": 0.506, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5395292697646349, | |
| "grad_norm": 4.2799950491642695, | |
| "learning_rate": 2.5965269591273075e-06, | |
| "loss": 0.5054, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 0.539931603299135, | |
| "grad_norm": 4.111277509443795, | |
| "learning_rate": 2.59301853915442e-06, | |
| "loss": 0.5896, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.5403339368336351, | |
| "grad_norm": 3.8507472387773416, | |
| "learning_rate": 2.5895099357232946e-06, | |
| "loss": 0.4784, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 0.5407362703681352, | |
| "grad_norm": 4.553167071691355, | |
| "learning_rate": 2.5860011557538644e-06, | |
| "loss": 0.5889, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.5411386039026352, | |
| "grad_norm": 4.8377815361815175, | |
| "learning_rate": 2.5824922061664125e-06, | |
| "loss": 0.7153, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.5415409374371354, | |
| "grad_norm": 4.026163246451428, | |
| "learning_rate": 2.578983093881557e-06, | |
| "loss": 0.5578, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.5419432709716355, | |
| "grad_norm": 4.396870764657521, | |
| "learning_rate": 2.5754738258202345e-06, | |
| "loss": 0.7799, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 0.5423456045061356, | |
| "grad_norm": 4.349518153263542, | |
| "learning_rate": 2.5719644089036916e-06, | |
| "loss": 0.5919, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.5427479380406357, | |
| "grad_norm": 4.355976508524607, | |
| "learning_rate": 2.568454850053467e-06, | |
| "loss": 0.5903, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 0.5431502715751357, | |
| "grad_norm": 3.951612074252993, | |
| "learning_rate": 2.5649451561913796e-06, | |
| "loss": 0.5038, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5435526051096359, | |
| "grad_norm": 4.030293462511148, | |
| "learning_rate": 2.561435334239515e-06, | |
| "loss": 0.5308, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.543954938644136, | |
| "grad_norm": 4.530833691231785, | |
| "learning_rate": 2.55792539112021e-06, | |
| "loss": 0.5977, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.544357272178636, | |
| "grad_norm": 4.1989735383766185, | |
| "learning_rate": 2.5544153337560422e-06, | |
| "loss": 0.5686, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 0.5447596057131362, | |
| "grad_norm": 4.32105387076282, | |
| "learning_rate": 2.5509051690698137e-06, | |
| "loss": 0.6257, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.5451619392476363, | |
| "grad_norm": 3.34919768311592, | |
| "learning_rate": 2.5473949039845388e-06, | |
| "loss": 0.4131, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.5455642727821364, | |
| "grad_norm": 4.128988896294745, | |
| "learning_rate": 2.5438845454234284e-06, | |
| "loss": 0.6007, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.5459666063166365, | |
| "grad_norm": 4.122788652813818, | |
| "learning_rate": 2.5403741003098793e-06, | |
| "loss": 0.5223, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 0.5463689398511365, | |
| "grad_norm": 4.420118909526267, | |
| "learning_rate": 2.5368635755674586e-06, | |
| "loss": 0.5506, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.5467712733856367, | |
| "grad_norm": 4.2195230085645345, | |
| "learning_rate": 2.5333529781198895e-06, | |
| "loss": 0.5989, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 0.5471736069201368, | |
| "grad_norm": 4.20795164503954, | |
| "learning_rate": 2.5298423148910396e-06, | |
| "loss": 0.4462, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5475759404546369, | |
| "grad_norm": 4.010223353178494, | |
| "learning_rate": 2.5263315928049063e-06, | |
| "loss": 0.5192, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 0.547978273989137, | |
| "grad_norm": 4.816701841105469, | |
| "learning_rate": 2.5228208187856024e-06, | |
| "loss": 0.6599, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.548380607523637, | |
| "grad_norm": 4.023413458632839, | |
| "learning_rate": 2.5193099997573437e-06, | |
| "loss": 0.6185, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 0.5487829410581372, | |
| "grad_norm": 3.6516400727809115, | |
| "learning_rate": 2.5157991426444343e-06, | |
| "loss": 0.5859, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.5491852745926373, | |
| "grad_norm": 4.119751084462459, | |
| "learning_rate": 2.5122882543712546e-06, | |
| "loss": 0.5255, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.5495876081271374, | |
| "grad_norm": 3.957835467170672, | |
| "learning_rate": 2.5087773418622447e-06, | |
| "loss": 0.4659, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.5499899416616375, | |
| "grad_norm": 3.500258182406765, | |
| "learning_rate": 2.505266412041893e-06, | |
| "loss": 0.4385, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 0.5503922751961376, | |
| "grad_norm": 4.041408070624076, | |
| "learning_rate": 2.5017554718347233e-06, | |
| "loss": 0.5367, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.5507946087306377, | |
| "grad_norm": 4.091765389792879, | |
| "learning_rate": 2.4982445281652775e-06, | |
| "loss": 0.5744, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 0.5511969422651378, | |
| "grad_norm": 4.469717920616285, | |
| "learning_rate": 2.4947335879581073e-06, | |
| "loss": 0.5818, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5515992757996379, | |
| "grad_norm": 3.9779088164073833, | |
| "learning_rate": 2.4912226581377566e-06, | |
| "loss": 0.5893, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 0.552001609334138, | |
| "grad_norm": 3.627034776419984, | |
| "learning_rate": 2.4877117456287463e-06, | |
| "loss": 0.4902, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.5524039428686381, | |
| "grad_norm": 3.8562269767576405, | |
| "learning_rate": 2.484200857355566e-06, | |
| "loss": 0.6268, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 0.5528062764031382, | |
| "grad_norm": 4.192164362531014, | |
| "learning_rate": 2.4806900002426567e-06, | |
| "loss": 0.6121, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.5532086099376383, | |
| "grad_norm": 3.916801705190097, | |
| "learning_rate": 2.477179181214398e-06, | |
| "loss": 0.4926, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.5536109434721384, | |
| "grad_norm": 3.693356946438789, | |
| "learning_rate": 2.473668407195095e-06, | |
| "loss": 0.5189, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.5540132770066385, | |
| "grad_norm": 4.229576715855437, | |
| "learning_rate": 2.4701576851089613e-06, | |
| "loss": 0.6398, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.5544156105411386, | |
| "grad_norm": 3.6167470608574197, | |
| "learning_rate": 2.4666470218801113e-06, | |
| "loss": 0.5175, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.5548179440756387, | |
| "grad_norm": 5.070171032981664, | |
| "learning_rate": 2.463136424432542e-06, | |
| "loss": 0.4988, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.5552202776101388, | |
| "grad_norm": 4.058368062649386, | |
| "learning_rate": 2.4596258996901215e-06, | |
| "loss": 0.4313, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.555622611144639, | |
| "grad_norm": 3.9184186874819464, | |
| "learning_rate": 2.4561154545765724e-06, | |
| "loss": 0.5338, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 0.556024944679139, | |
| "grad_norm": 3.1244549306950784, | |
| "learning_rate": 2.4526050960154616e-06, | |
| "loss": 0.4289, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.5564272782136391, | |
| "grad_norm": 4.04400253855949, | |
| "learning_rate": 2.4490948309301867e-06, | |
| "loss": 0.5257, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 0.5568296117481392, | |
| "grad_norm": 3.9303715740181513, | |
| "learning_rate": 2.445584666243959e-06, | |
| "loss": 0.5149, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.5572319452826393, | |
| "grad_norm": 4.395655391522824, | |
| "learning_rate": 2.442074608879791e-06, | |
| "loss": 0.6249, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.5576342788171395, | |
| "grad_norm": 3.8268439749597576, | |
| "learning_rate": 2.438564665760486e-06, | |
| "loss": 0.5613, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.5580366123516395, | |
| "grad_norm": 4.255793020118025, | |
| "learning_rate": 2.4350548438086212e-06, | |
| "loss": 0.6155, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 0.5584389458861396, | |
| "grad_norm": 3.979739013550483, | |
| "learning_rate": 2.4315451499465336e-06, | |
| "loss": 0.5394, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.5588412794206397, | |
| "grad_norm": 3.781411432288367, | |
| "learning_rate": 2.4280355910963097e-06, | |
| "loss": 0.4507, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 0.5592436129551398, | |
| "grad_norm": 4.371781717934846, | |
| "learning_rate": 2.4245261741797663e-06, | |
| "loss": 0.5305, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.55964594648964, | |
| "grad_norm": 3.52895283713103, | |
| "learning_rate": 2.421016906118444e-06, | |
| "loss": 0.5194, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 0.56004828002414, | |
| "grad_norm": 3.872042926144624, | |
| "learning_rate": 2.417507793833587e-06, | |
| "loss": 0.4998, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.5604506135586401, | |
| "grad_norm": 3.391234256928363, | |
| "learning_rate": 2.413998844246136e-06, | |
| "loss": 0.4093, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 0.5608529470931403, | |
| "grad_norm": 3.9654243758806174, | |
| "learning_rate": 2.4104900642767066e-06, | |
| "loss": 0.4515, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.5612552806276403, | |
| "grad_norm": 3.5879075344069378, | |
| "learning_rate": 2.4069814608455804e-06, | |
| "loss": 0.5207, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.5616576141621404, | |
| "grad_norm": 4.104094820028554, | |
| "learning_rate": 2.4034730408726938e-06, | |
| "loss": 0.5625, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.5620599476966405, | |
| "grad_norm": 3.772298699595217, | |
| "learning_rate": 2.3999648112776183e-06, | |
| "loss": 0.5068, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 0.5624622812311406, | |
| "grad_norm": 3.688052093132043, | |
| "learning_rate": 2.3964567789795533e-06, | |
| "loss": 0.5042, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.5628646147656408, | |
| "grad_norm": 3.8931716258044498, | |
| "learning_rate": 2.3929489508973047e-06, | |
| "loss": 0.5485, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 0.5632669483001408, | |
| "grad_norm": 4.224728616738161, | |
| "learning_rate": 2.389441333949279e-06, | |
| "loss": 0.6582, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5636692818346409, | |
| "grad_norm": 3.8494878180062897, | |
| "learning_rate": 2.3859339350534646e-06, | |
| "loss": 0.5379, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 0.564071615369141, | |
| "grad_norm": 4.200325419085908, | |
| "learning_rate": 2.382426761127421e-06, | |
| "loss": 0.5997, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.5644739489036411, | |
| "grad_norm": 4.143252972394019, | |
| "learning_rate": 2.378919819088263e-06, | |
| "loss": 0.6195, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 0.5648762824381413, | |
| "grad_norm": 3.7174533902571145, | |
| "learning_rate": 2.3754131158526483e-06, | |
| "loss": 0.5317, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.5652786159726413, | |
| "grad_norm": 4.0056931921159356, | |
| "learning_rate": 2.371906658336764e-06, | |
| "loss": 0.5232, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.5656809495071414, | |
| "grad_norm": 3.830247393182765, | |
| "learning_rate": 2.368400453456313e-06, | |
| "loss": 0.4898, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.5660832830416416, | |
| "grad_norm": 3.7820185066794787, | |
| "learning_rate": 2.364894508126499e-06, | |
| "loss": 0.5438, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 0.5664856165761416, | |
| "grad_norm": 4.4898443665627, | |
| "learning_rate": 2.3613888292620135e-06, | |
| "loss": 0.6322, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.5668879501106417, | |
| "grad_norm": 4.157236186991902, | |
| "learning_rate": 2.3578834237770227e-06, | |
| "loss": 0.5978, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 0.5672902836451418, | |
| "grad_norm": 4.576188931404494, | |
| "learning_rate": 2.354378298585155e-06, | |
| "loss": 0.5774, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5676926171796419, | |
| "grad_norm": 3.714487248907507, | |
| "learning_rate": 2.3508734605994855e-06, | |
| "loss": 0.4305, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 0.5680949507141421, | |
| "grad_norm": 3.9270791753173313, | |
| "learning_rate": 2.3473689167325205e-06, | |
| "loss": 0.4912, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.5684972842486421, | |
| "grad_norm": 3.655781362606588, | |
| "learning_rate": 2.3438646738961886e-06, | |
| "loss": 0.5125, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 0.5688996177831422, | |
| "grad_norm": 4.2097153394323845, | |
| "learning_rate": 2.3403607390018246e-06, | |
| "loss": 0.4273, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.5693019513176423, | |
| "grad_norm": 3.7418997500983, | |
| "learning_rate": 2.3368571189601556e-06, | |
| "loss": 0.4534, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.5697042848521424, | |
| "grad_norm": 3.7385152001909154, | |
| "learning_rate": 2.3333538206812874e-06, | |
| "loss": 0.4461, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.5701066183866426, | |
| "grad_norm": 4.243485694858217, | |
| "learning_rate": 2.32985085107469e-06, | |
| "loss": 0.4943, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 0.5705089519211426, | |
| "grad_norm": 4.064181274521891, | |
| "learning_rate": 2.3263482170491884e-06, | |
| "loss": 0.6696, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.5709112854556427, | |
| "grad_norm": 4.168819688893492, | |
| "learning_rate": 2.3228459255129425e-06, | |
| "loss": 0.5914, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 0.5713136189901429, | |
| "grad_norm": 3.299064883781313, | |
| "learning_rate": 2.3193439833734403e-06, | |
| "loss": 0.3987, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5717159525246429, | |
| "grad_norm": 4.887635853989916, | |
| "learning_rate": 2.3158423975374757e-06, | |
| "loss": 0.4842, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 0.572118286059143, | |
| "grad_norm": 3.4644648967520806, | |
| "learning_rate": 2.312341174911145e-06, | |
| "loss": 0.4643, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.5725206195936431, | |
| "grad_norm": 3.75941250297541, | |
| "learning_rate": 2.3088403223998235e-06, | |
| "loss": 0.4772, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 0.5729229531281432, | |
| "grad_norm": 3.4615485090255578, | |
| "learning_rate": 2.3053398469081613e-06, | |
| "loss": 0.5075, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.5733252866626434, | |
| "grad_norm": 4.2227564973481995, | |
| "learning_rate": 2.30183975534006e-06, | |
| "loss": 0.5125, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.5737276201971434, | |
| "grad_norm": 4.090888673139273, | |
| "learning_rate": 2.298340054598668e-06, | |
| "loss": 0.5183, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.5741299537316435, | |
| "grad_norm": 4.548845256935699, | |
| "learning_rate": 2.294840751586361e-06, | |
| "loss": 0.639, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 0.5745322872661436, | |
| "grad_norm": 3.901263423192965, | |
| "learning_rate": 2.291341853204731e-06, | |
| "loss": 0.623, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.5749346208006437, | |
| "grad_norm": 3.347357658304439, | |
| "learning_rate": 2.287843366354571e-06, | |
| "loss": 0.4009, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 0.5753369543351439, | |
| "grad_norm": 4.512229941763247, | |
| "learning_rate": 2.284345297935863e-06, | |
| "loss": 0.6336, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5757392878696439, | |
| "grad_norm": 4.132485335550154, | |
| "learning_rate": 2.2808476548477636e-06, | |
| "loss": 0.5015, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 0.576141621404144, | |
| "grad_norm": 4.226000440762799, | |
| "learning_rate": 2.2773504439885903e-06, | |
| "loss": 0.5063, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.5765439549386442, | |
| "grad_norm": 4.010604789353783, | |
| "learning_rate": 2.27385367225581e-06, | |
| "loss": 0.5085, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 0.5769462884731442, | |
| "grad_norm": 3.71838877160847, | |
| "learning_rate": 2.2703573465460194e-06, | |
| "loss": 0.483, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.5773486220076444, | |
| "grad_norm": 3.715720870136391, | |
| "learning_rate": 2.2668614737549395e-06, | |
| "loss": 0.5292, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.5777509555421444, | |
| "grad_norm": 3.7875519229722756, | |
| "learning_rate": 2.263366060777397e-06, | |
| "loss": 0.4504, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.5781532890766445, | |
| "grad_norm": 4.235567100677654, | |
| "learning_rate": 2.25987111450731e-06, | |
| "loss": 0.6075, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 0.5785556226111447, | |
| "grad_norm": 4.23015897289991, | |
| "learning_rate": 2.256376641837679e-06, | |
| "loss": 0.6036, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.5789579561456447, | |
| "grad_norm": 4.397376552533781, | |
| "learning_rate": 2.2528826496605672e-06, | |
| "loss": 0.6584, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 0.5793602896801449, | |
| "grad_norm": 3.987430088200103, | |
| "learning_rate": 2.249389144867092e-06, | |
| "loss": 0.5312, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5797626232146449, | |
| "grad_norm": 4.349402058381357, | |
| "learning_rate": 2.24589613434741e-06, | |
| "loss": 0.4806, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 0.580164956749145, | |
| "grad_norm": 4.389627692568862, | |
| "learning_rate": 2.2424036249907032e-06, | |
| "loss": 0.649, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.5805672902836452, | |
| "grad_norm": 3.889422512107434, | |
| "learning_rate": 2.238911623685162e-06, | |
| "loss": 0.5553, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 0.5809696238181452, | |
| "grad_norm": 4.1471977764513674, | |
| "learning_rate": 2.235420137317978e-06, | |
| "loss": 0.5527, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.5813719573526454, | |
| "grad_norm": 3.642246289302764, | |
| "learning_rate": 2.2319291727753266e-06, | |
| "loss": 0.4669, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.5817742908871455, | |
| "grad_norm": 3.7186046973608406, | |
| "learning_rate": 2.228438736942354e-06, | |
| "loss": 0.3984, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.5821766244216455, | |
| "grad_norm": 4.208948818822306, | |
| "learning_rate": 2.2249488367031612e-06, | |
| "loss": 0.574, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 0.5825789579561457, | |
| "grad_norm": 3.4944920810310633, | |
| "learning_rate": 2.221459478940797e-06, | |
| "loss": 0.4565, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.5829812914906457, | |
| "grad_norm": 3.8209272903130325, | |
| "learning_rate": 2.2179706705372377e-06, | |
| "loss": 0.4116, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 0.5833836250251458, | |
| "grad_norm": 4.562727204828236, | |
| "learning_rate": 2.214482418373376e-06, | |
| "loss": 0.6272, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.583785958559646, | |
| "grad_norm": 3.986021082183279, | |
| "learning_rate": 2.21099472932901e-06, | |
| "loss": 0.4946, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 0.584188292094146, | |
| "grad_norm": 3.619941584357637, | |
| "learning_rate": 2.207507610282823e-06, | |
| "loss": 0.5552, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.5845906256286462, | |
| "grad_norm": 4.265275522318885, | |
| "learning_rate": 2.2040210681123773e-06, | |
| "loss": 0.5387, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 0.5849929591631462, | |
| "grad_norm": 3.592671313761321, | |
| "learning_rate": 2.2005351096940966e-06, | |
| "loss": 0.4221, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.5853952926976463, | |
| "grad_norm": 4.0275300106962755, | |
| "learning_rate": 2.1970497419032546e-06, | |
| "loss": 0.5781, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.5857976262321465, | |
| "grad_norm": 3.8884576976533425, | |
| "learning_rate": 2.1935649716139567e-06, | |
| "loss": 0.5121, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.5861999597666465, | |
| "grad_norm": 4.481785017500937, | |
| "learning_rate": 2.1900808056991335e-06, | |
| "loss": 0.6321, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 0.5866022933011467, | |
| "grad_norm": 3.74252328127819, | |
| "learning_rate": 2.186597251030521e-06, | |
| "loss": 0.4935, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.5870046268356468, | |
| "grad_norm": 3.9659714356438442, | |
| "learning_rate": 2.183114314478651e-06, | |
| "loss": 0.5503, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 0.5874069603701468, | |
| "grad_norm": 3.435746642131932, | |
| "learning_rate": 2.179632002912837e-06, | |
| "loss": 0.3508, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.587809293904647, | |
| "grad_norm": 4.32896011694038, | |
| "learning_rate": 2.176150323201157e-06, | |
| "loss": 0.5728, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 0.588211627439147, | |
| "grad_norm": 4.1710482850436215, | |
| "learning_rate": 2.1726692822104455e-06, | |
| "loss": 0.5115, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.5886139609736472, | |
| "grad_norm": 4.1244090573644145, | |
| "learning_rate": 2.169188886806277e-06, | |
| "loss": 0.3078, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 0.5890162945081473, | |
| "grad_norm": 3.462366030557245, | |
| "learning_rate": 2.1657091438529517e-06, | |
| "loss": 0.472, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.5894186280426473, | |
| "grad_norm": 3.920420130653373, | |
| "learning_rate": 2.1622300602134824e-06, | |
| "loss": 0.5559, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.5898209615771475, | |
| "grad_norm": 4.574556709525141, | |
| "learning_rate": 2.158751642749583e-06, | |
| "loss": 0.5525, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.5902232951116475, | |
| "grad_norm": 3.8284931733353513, | |
| "learning_rate": 2.155273898321653e-06, | |
| "loss": 0.5681, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 0.5906256286461476, | |
| "grad_norm": 3.3784102201483677, | |
| "learning_rate": 2.151796833788766e-06, | |
| "loss": 0.4727, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.5910279621806478, | |
| "grad_norm": 4.047681657794816, | |
| "learning_rate": 2.1483204560086507e-06, | |
| "loss": 0.5665, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 0.5914302957151478, | |
| "grad_norm": 4.308357381109115, | |
| "learning_rate": 2.144844771837685e-06, | |
| "loss": 0.6112, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.591832629249648, | |
| "grad_norm": 3.733873078989285, | |
| "learning_rate": 2.1413697881308785e-06, | |
| "loss": 0.494, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 0.5922349627841481, | |
| "grad_norm": 4.130337043839704, | |
| "learning_rate": 2.1378955117418567e-06, | |
| "loss": 0.445, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.5926372963186481, | |
| "grad_norm": 4.189878829812914, | |
| "learning_rate": 2.1344219495228537e-06, | |
| "loss": 0.569, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 0.5930396298531483, | |
| "grad_norm": 3.926746164109752, | |
| "learning_rate": 2.1309491083246915e-06, | |
| "loss": 0.5696, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.5934419633876483, | |
| "grad_norm": 3.688932516880435, | |
| "learning_rate": 2.127476994996772e-06, | |
| "loss": 0.5001, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.5938442969221485, | |
| "grad_norm": 4.084824375606503, | |
| "learning_rate": 2.1240056163870614e-06, | |
| "loss": 0.613, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.5942466304566486, | |
| "grad_norm": 4.614880824241214, | |
| "learning_rate": 2.120534979342078e-06, | |
| "loss": 0.5252, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 0.5946489639911486, | |
| "grad_norm": 3.0888633468235014, | |
| "learning_rate": 2.1170650907068742e-06, | |
| "loss": 0.4531, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.5950512975256488, | |
| "grad_norm": 4.213120019047452, | |
| "learning_rate": 2.113595957325029e-06, | |
| "loss": 0.6458, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 0.5954536310601488, | |
| "grad_norm": 3.610258712936895, | |
| "learning_rate": 2.1101275860386313e-06, | |
| "loss": 0.5652, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.595855964594649, | |
| "grad_norm": 3.901115632452379, | |
| "learning_rate": 2.1066599836882667e-06, | |
| "loss": 0.4731, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 0.5962582981291491, | |
| "grad_norm": 4.576321666746087, | |
| "learning_rate": 2.1031931571130053e-06, | |
| "loss": 0.6799, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.5966606316636491, | |
| "grad_norm": 4.188708168775149, | |
| "learning_rate": 2.0997271131503847e-06, | |
| "loss": 0.5172, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 0.5970629651981493, | |
| "grad_norm": 3.993460741506799, | |
| "learning_rate": 2.0962618586364005e-06, | |
| "loss": 0.4828, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.5974652987326494, | |
| "grad_norm": 3.977543353419007, | |
| "learning_rate": 2.092797400405493e-06, | |
| "loss": 0.5368, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.5978676322671495, | |
| "grad_norm": 4.079669745022429, | |
| "learning_rate": 2.0893337452905293e-06, | |
| "loss": 0.4879, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.5982699658016496, | |
| "grad_norm": 4.000645952876263, | |
| "learning_rate": 2.0858709001227924e-06, | |
| "loss": 0.5338, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 0.5986722993361496, | |
| "grad_norm": 4.483257438703314, | |
| "learning_rate": 2.0824088717319697e-06, | |
| "loss": 0.5771, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.5990746328706498, | |
| "grad_norm": 3.493611740579638, | |
| "learning_rate": 2.0789476669461375e-06, | |
| "loss": 0.4758, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 0.5994769664051499, | |
| "grad_norm": 3.9869817394957385, | |
| "learning_rate": 2.0754872925917465e-06, | |
| "loss": 0.5482, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.59987929993965, | |
| "grad_norm": 3.655771235240259, | |
| "learning_rate": 2.072027755493612e-06, | |
| "loss": 0.4924, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 0.6002816334741501, | |
| "grad_norm": 3.959985484700295, | |
| "learning_rate": 2.0685690624748943e-06, | |
| "loss": 0.5653, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.6006839670086501, | |
| "grad_norm": 3.4092513778358047, | |
| "learning_rate": 2.0651112203570904e-06, | |
| "loss": 0.4375, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 0.6010863005431503, | |
| "grad_norm": 4.050380522574125, | |
| "learning_rate": 2.061654235960021e-06, | |
| "loss": 0.5627, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.6014886340776504, | |
| "grad_norm": 4.329223167800351, | |
| "learning_rate": 2.058198116101814e-06, | |
| "loss": 0.5642, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.6018909676121504, | |
| "grad_norm": 3.475966176009881, | |
| "learning_rate": 2.0547428675988906e-06, | |
| "loss": 0.4417, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.6022933011466506, | |
| "grad_norm": 3.7228401404060443, | |
| "learning_rate": 2.051288497265955e-06, | |
| "loss": 0.4509, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 0.6026956346811507, | |
| "grad_norm": 3.9298800993895284, | |
| "learning_rate": 2.04783501191598e-06, | |
| "loss": 0.4373, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.6030979682156508, | |
| "grad_norm": 4.3431668642603105, | |
| "learning_rate": 2.0443824183601917e-06, | |
| "loss": 0.6361, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 0.6035003017501509, | |
| "grad_norm": 3.759176356037184, | |
| "learning_rate": 2.0409307234080577e-06, | |
| "loss": 0.4214, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6039026352846509, | |
| "grad_norm": 3.881662137011941, | |
| "learning_rate": 2.0374799338672723e-06, | |
| "loss": 0.4868, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 0.6043049688191511, | |
| "grad_norm": 4.786052455382573, | |
| "learning_rate": 2.034030056543747e-06, | |
| "loss": 0.6772, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.6047073023536512, | |
| "grad_norm": 4.137240403825967, | |
| "learning_rate": 2.0305810982415907e-06, | |
| "loss": 0.4981, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 0.6051096358881513, | |
| "grad_norm": 3.9245214475911947, | |
| "learning_rate": 2.0271330657631034e-06, | |
| "loss": 0.461, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.6055119694226514, | |
| "grad_norm": 4.043701387123113, | |
| "learning_rate": 2.023685965908755e-06, | |
| "loss": 0.4861, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.6059143029571514, | |
| "grad_norm": 4.578623188458431, | |
| "learning_rate": 2.020239805477179e-06, | |
| "loss": 0.5516, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.6063166364916516, | |
| "grad_norm": 3.939503557149013, | |
| "learning_rate": 2.0167945912651542e-06, | |
| "loss": 0.5348, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 0.6067189700261517, | |
| "grad_norm": 4.274904255142657, | |
| "learning_rate": 2.0133503300675963e-06, | |
| "loss": 0.5992, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.6071213035606517, | |
| "grad_norm": 3.732143003199723, | |
| "learning_rate": 2.0099070286775367e-06, | |
| "loss": 0.4891, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 0.6075236370951519, | |
| "grad_norm": 4.093400863247009, | |
| "learning_rate": 2.0064646938861175e-06, | |
| "loss": 0.4785, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.607925970629652, | |
| "grad_norm": 3.850637017187709, | |
| "learning_rate": 2.0030233324825734e-06, | |
| "loss": 0.5371, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 0.6083283041641521, | |
| "grad_norm": 3.726527595640385, | |
| "learning_rate": 1.999582951254219e-06, | |
| "loss": 0.4105, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.6087306376986522, | |
| "grad_norm": 4.0679286236770045, | |
| "learning_rate": 1.9961435569864366e-06, | |
| "loss": 0.524, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 0.6091329712331522, | |
| "grad_norm": 4.191017273712236, | |
| "learning_rate": 1.99270515646266e-06, | |
| "loss": 0.422, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.6095353047676524, | |
| "grad_norm": 4.549567455680612, | |
| "learning_rate": 1.9892677564643643e-06, | |
| "loss": 0.5753, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.6099376383021525, | |
| "grad_norm": 4.239380717200701, | |
| "learning_rate": 1.985831363771052e-06, | |
| "loss": 0.5251, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.6103399718366526, | |
| "grad_norm": 3.8382650524932433, | |
| "learning_rate": 1.9823959851602387e-06, | |
| "loss": 0.6749, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 0.6107423053711527, | |
| "grad_norm": 4.058628419689791, | |
| "learning_rate": 1.9789616274074375e-06, | |
| "loss": 0.5202, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.6111446389056527, | |
| "grad_norm": 4.1466696163681815, | |
| "learning_rate": 1.9755282972861515e-06, | |
| "loss": 0.4595, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 0.6115469724401529, | |
| "grad_norm": 4.193830162309673, | |
| "learning_rate": 1.9720960015678555e-06, | |
| "loss": 0.485, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.611949305974653, | |
| "grad_norm": 4.239153001501294, | |
| "learning_rate": 1.9686647470219835e-06, | |
| "loss": 0.6782, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 0.6123516395091531, | |
| "grad_norm": 4.138990574689833, | |
| "learning_rate": 1.965234540415918e-06, | |
| "loss": 0.6284, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.6127539730436532, | |
| "grad_norm": 4.079634170381014, | |
| "learning_rate": 1.9618053885149715e-06, | |
| "loss": 0.4656, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 0.6131563065781533, | |
| "grad_norm": 3.9398678794833204, | |
| "learning_rate": 1.9583772980823797e-06, | |
| "loss": 0.4345, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.6135586401126534, | |
| "grad_norm": 3.8999859116475237, | |
| "learning_rate": 1.9549502758792825e-06, | |
| "loss": 0.4463, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.6139609736471535, | |
| "grad_norm": 3.8236374179780652, | |
| "learning_rate": 1.9515243286647155e-06, | |
| "loss": 0.5032, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.6143633071816536, | |
| "grad_norm": 3.9105069856639787, | |
| "learning_rate": 1.9480994631955904e-06, | |
| "loss": 0.5249, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 0.6147656407161537, | |
| "grad_norm": 4.090697174474606, | |
| "learning_rate": 1.944675686226688e-06, | |
| "loss": 0.5986, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.6151679742506538, | |
| "grad_norm": 4.026970204591501, | |
| "learning_rate": 1.941253004510642e-06, | |
| "loss": 0.5695, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 0.6155703077851539, | |
| "grad_norm": 3.649099842703245, | |
| "learning_rate": 1.937831424797926e-06, | |
| "loss": 0.436, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.615972641319654, | |
| "grad_norm": 3.8376284628790773, | |
| "learning_rate": 1.9344109538368388e-06, | |
| "loss": 0.4986, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 0.616374974854154, | |
| "grad_norm": 4.048438236221887, | |
| "learning_rate": 1.9309915983734933e-06, | |
| "loss": 0.5608, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.6167773083886542, | |
| "grad_norm": 4.036284880877361, | |
| "learning_rate": 1.9275733651518038e-06, | |
| "loss": 0.552, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 0.6171796419231543, | |
| "grad_norm": 4.098386930913388, | |
| "learning_rate": 1.9241562609134694e-06, | |
| "loss": 0.5564, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.6175819754576544, | |
| "grad_norm": 4.4349116098990065, | |
| "learning_rate": 1.9207402923979633e-06, | |
| "loss": 0.5937, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.6179843089921545, | |
| "grad_norm": 3.9955554180753556, | |
| "learning_rate": 1.9173254663425165e-06, | |
| "loss": 0.4773, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.6183866425266547, | |
| "grad_norm": 3.7895980004495553, | |
| "learning_rate": 1.9139117894821107e-06, | |
| "loss": 0.5117, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 0.6187889760611547, | |
| "grad_norm": 4.503044474467485, | |
| "learning_rate": 1.910499268549459e-06, | |
| "loss": 0.5728, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.6191913095956548, | |
| "grad_norm": 4.5048282169496865, | |
| "learning_rate": 1.9070879102749944e-06, | |
| "loss": 0.4952, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 0.6195936431301549, | |
| "grad_norm": 3.6152226921367516, | |
| "learning_rate": 1.9036777213868567e-06, | |
| "loss": 0.4901, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.619995976664655, | |
| "grad_norm": 4.56665728701204, | |
| "learning_rate": 1.9002687086108808e-06, | |
| "loss": 0.6217, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 0.6203983101991551, | |
| "grad_norm": 3.8747261446432972, | |
| "learning_rate": 1.8968608786705803e-06, | |
| "loss": 0.4346, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.6208006437336552, | |
| "grad_norm": 3.553549231891504, | |
| "learning_rate": 1.8934542382871365e-06, | |
| "loss": 0.4086, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 0.6212029772681553, | |
| "grad_norm": 4.273007467466175, | |
| "learning_rate": 1.8900487941793862e-06, | |
| "loss": 0.5589, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.6216053108026554, | |
| "grad_norm": 4.033041498302765, | |
| "learning_rate": 1.8866445530638034e-06, | |
| "loss": 0.5258, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.6220076443371555, | |
| "grad_norm": 3.847764801262625, | |
| "learning_rate": 1.883241521654492e-06, | |
| "loss": 0.5286, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.6224099778716556, | |
| "grad_norm": 4.141005318996514, | |
| "learning_rate": 1.87983970666317e-06, | |
| "loss": 0.5428, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 0.6228123114061557, | |
| "grad_norm": 3.8545860722248286, | |
| "learning_rate": 1.8764391147991556e-06, | |
| "loss": 0.4504, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.6232146449406558, | |
| "grad_norm": 3.9064270159769543, | |
| "learning_rate": 1.8730397527693544e-06, | |
| "loss": 0.49, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 0.623616978475156, | |
| "grad_norm": 3.5305364890500903, | |
| "learning_rate": 1.8696416272782467e-06, | |
| "loss": 0.4747, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.624019312009656, | |
| "grad_norm": 4.367606237537799, | |
| "learning_rate": 1.866244745027874e-06, | |
| "loss": 0.6066, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 0.6244216455441561, | |
| "grad_norm": 3.778565429919078, | |
| "learning_rate": 1.8628491127178272e-06, | |
| "loss": 0.4401, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.6248239790786562, | |
| "grad_norm": 4.4484086477744675, | |
| "learning_rate": 1.8594547370452288e-06, | |
| "loss": 0.5702, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 0.6252263126131563, | |
| "grad_norm": 4.133384847499752, | |
| "learning_rate": 1.8560616247047253e-06, | |
| "loss": 0.4958, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.6256286461476565, | |
| "grad_norm": 4.481750156438252, | |
| "learning_rate": 1.8526697823884721e-06, | |
| "loss": 0.5982, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.6260309796821565, | |
| "grad_norm": 4.679844131200749, | |
| "learning_rate": 1.849279216786117e-06, | |
| "loss": 0.5699, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.6264333132166566, | |
| "grad_norm": 4.035894316525195, | |
| "learning_rate": 1.8458899345847936e-06, | |
| "loss": 0.6036, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 0.6268356467511567, | |
| "grad_norm": 3.3784184252716836, | |
| "learning_rate": 1.8425019424690993e-06, | |
| "loss": 0.3667, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.6272379802856568, | |
| "grad_norm": 3.7155353324243325, | |
| "learning_rate": 1.839115247121091e-06, | |
| "loss": 0.4744, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 0.627640313820157, | |
| "grad_norm": 4.062605549457995, | |
| "learning_rate": 1.8357298552202676e-06, | |
| "loss": 0.4846, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.628042647354657, | |
| "grad_norm": 3.968579655925073, | |
| "learning_rate": 1.832345773443557e-06, | |
| "loss": 0.4563, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 0.6284449808891571, | |
| "grad_norm": 3.859404877482336, | |
| "learning_rate": 1.8289630084653009e-06, | |
| "loss": 0.5339, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.6288473144236573, | |
| "grad_norm": 4.32493582991546, | |
| "learning_rate": 1.825581566957247e-06, | |
| "loss": 0.5206, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 0.6292496479581573, | |
| "grad_norm": 3.7620622738243474, | |
| "learning_rate": 1.8222014555885303e-06, | |
| "loss": 0.5187, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.6296519814926574, | |
| "grad_norm": 3.9245871423313616, | |
| "learning_rate": 1.8188226810256647e-06, | |
| "loss": 0.5302, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.6300543150271575, | |
| "grad_norm": 4.0730169054317305, | |
| "learning_rate": 1.8154452499325273e-06, | |
| "loss": 0.482, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.6304566485616576, | |
| "grad_norm": 4.0887356034823314, | |
| "learning_rate": 1.8120691689703423e-06, | |
| "loss": 0.6323, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 0.6308589820961578, | |
| "grad_norm": 4.134013360882183, | |
| "learning_rate": 1.8086944447976745e-06, | |
| "loss": 0.4215, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.6312613156306578, | |
| "grad_norm": 4.267968482401992, | |
| "learning_rate": 1.805321084070412e-06, | |
| "loss": 0.6421, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 0.6316636491651579, | |
| "grad_norm": 4.337432367486679, | |
| "learning_rate": 1.8019490934417533e-06, | |
| "loss": 0.614, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.632065982699658, | |
| "grad_norm": 4.325604571245914, | |
| "learning_rate": 1.7985784795621946e-06, | |
| "loss": 0.6082, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 0.6324683162341581, | |
| "grad_norm": 4.7255506567581005, | |
| "learning_rate": 1.7952092490795165e-06, | |
| "loss": 0.5848, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.6328706497686583, | |
| "grad_norm": 3.9917358158112513, | |
| "learning_rate": 1.7918414086387723e-06, | |
| "loss": 0.51, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 0.6332729833031583, | |
| "grad_norm": 3.9759190309543806, | |
| "learning_rate": 1.788474964882273e-06, | |
| "loss": 0.4348, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.6336753168376584, | |
| "grad_norm": 3.9060781229407673, | |
| "learning_rate": 1.7851099244495761e-06, | |
| "loss": 0.5328, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.6340776503721586, | |
| "grad_norm": 3.794629121755811, | |
| "learning_rate": 1.7817462939774683e-06, | |
| "loss": 0.5406, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.6344799839066586, | |
| "grad_norm": 4.434204491241485, | |
| "learning_rate": 1.778384080099959e-06, | |
| "loss": 0.6955, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 0.6348823174411587, | |
| "grad_norm": 4.243047298973574, | |
| "learning_rate": 1.775023289448261e-06, | |
| "loss": 0.6021, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.6352846509756588, | |
| "grad_norm": 3.906621293593377, | |
| "learning_rate": 1.7716639286507834e-06, | |
| "loss": 0.5017, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 0.6356869845101589, | |
| "grad_norm": 4.433075604901836, | |
| "learning_rate": 1.7683060043331102e-06, | |
| "loss": 0.6304, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.6360893180446591, | |
| "grad_norm": 4.0324680233039425, | |
| "learning_rate": 1.764949523117997e-06, | |
| "loss": 0.4593, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 0.6364916515791591, | |
| "grad_norm": 4.861200946299266, | |
| "learning_rate": 1.7615944916253511e-06, | |
| "loss": 0.5755, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.6368939851136592, | |
| "grad_norm": 3.4859779578870462, | |
| "learning_rate": 1.7582409164722219e-06, | |
| "loss": 0.4494, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 0.6372963186481593, | |
| "grad_norm": 4.394364956739334, | |
| "learning_rate": 1.7548888042727835e-06, | |
| "loss": 0.5756, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.6376986521826594, | |
| "grad_norm": 4.474860851761582, | |
| "learning_rate": 1.751538161638327e-06, | |
| "loss": 0.5217, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.6381009857171596, | |
| "grad_norm": 4.665357328027418, | |
| "learning_rate": 1.748188995177245e-06, | |
| "loss": 0.5599, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.6385033192516596, | |
| "grad_norm": 3.186769209343777, | |
| "learning_rate": 1.744841311495019e-06, | |
| "loss": 0.3409, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 0.6389056527861597, | |
| "grad_norm": 3.3506177658392744, | |
| "learning_rate": 1.7414951171942057e-06, | |
| "loss": 0.5176, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.6393079863206599, | |
| "grad_norm": 3.4136910306135935, | |
| "learning_rate": 1.7381504188744218e-06, | |
| "loss": 0.4464, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 0.6397103198551599, | |
| "grad_norm": 3.4917018490110467, | |
| "learning_rate": 1.7348072231323382e-06, | |
| "loss": 0.438, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.6401126533896601, | |
| "grad_norm": 3.421803275946972, | |
| "learning_rate": 1.7314655365616584e-06, | |
| "loss": 0.4189, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 0.6405149869241601, | |
| "grad_norm": 4.433987296079437, | |
| "learning_rate": 1.7281253657531123e-06, | |
| "loss": 0.606, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.6409173204586602, | |
| "grad_norm": 3.6737703307949983, | |
| "learning_rate": 1.7247867172944367e-06, | |
| "loss": 0.4349, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 0.6413196539931604, | |
| "grad_norm": 3.8221357123925914, | |
| "learning_rate": 1.721449597770369e-06, | |
| "loss": 0.484, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.6417219875276604, | |
| "grad_norm": 4.069693321778052, | |
| "learning_rate": 1.718114013762631e-06, | |
| "loss": 0.6507, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.6421243210621606, | |
| "grad_norm": 4.268934323506783, | |
| "learning_rate": 1.714779971849915e-06, | |
| "loss": 0.498, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.6425266545966606, | |
| "grad_norm": 4.426082622301671, | |
| "learning_rate": 1.7114474786078716e-06, | |
| "loss": 0.6755, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 0.6429289881311607, | |
| "grad_norm": 3.6655062613985288, | |
| "learning_rate": 1.708116540609098e-06, | |
| "loss": 0.5078, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.6433313216656609, | |
| "grad_norm": 4.096742196755438, | |
| "learning_rate": 1.7047871644231226e-06, | |
| "loss": 0.5085, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 0.6437336552001609, | |
| "grad_norm": 4.341802218643631, | |
| "learning_rate": 1.7014593566163949e-06, | |
| "loss": 0.6022, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.644135988734661, | |
| "grad_norm": 4.167303639683251, | |
| "learning_rate": 1.6981331237522716e-06, | |
| "loss": 0.595, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 0.6445383222691611, | |
| "grad_norm": 3.860753980307849, | |
| "learning_rate": 1.6948084723910002e-06, | |
| "loss": 0.4189, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.6449406558036612, | |
| "grad_norm": 3.801435117916446, | |
| "learning_rate": 1.6914854090897124e-06, | |
| "loss": 0.4691, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 0.6453429893381614, | |
| "grad_norm": 4.309844638484278, | |
| "learning_rate": 1.6881639404024063e-06, | |
| "loss": 0.4802, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.6457453228726614, | |
| "grad_norm": 3.8218645060402285, | |
| "learning_rate": 1.6848440728799346e-06, | |
| "loss": 0.6087, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.6461476564071615, | |
| "grad_norm": 4.318515117319728, | |
| "learning_rate": 1.6815258130699938e-06, | |
| "loss": 0.6644, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.6465499899416617, | |
| "grad_norm": 3.8706029461346114, | |
| "learning_rate": 1.6782091675171064e-06, | |
| "loss": 0.5314, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 0.6469523234761617, | |
| "grad_norm": 4.588786654868146, | |
| "learning_rate": 1.6748941427626142e-06, | |
| "loss": 0.5713, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.6473546570106619, | |
| "grad_norm": 4.206226833574013, | |
| "learning_rate": 1.671580745344661e-06, | |
| "loss": 0.5207, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 0.6477569905451619, | |
| "grad_norm": 4.1241193689590565, | |
| "learning_rate": 1.6682689817981827e-06, | |
| "loss": 0.5325, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.648159324079662, | |
| "grad_norm": 4.013331656155903, | |
| "learning_rate": 1.6649588586548888e-06, | |
| "loss": 0.5593, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 0.6485616576141622, | |
| "grad_norm": 4.701503684635364, | |
| "learning_rate": 1.6616503824432573e-06, | |
| "loss": 0.6878, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.6489639911486622, | |
| "grad_norm": 4.047494584818948, | |
| "learning_rate": 1.658343559688516e-06, | |
| "loss": 0.5131, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 0.6493663246831624, | |
| "grad_norm": 4.0317139401208575, | |
| "learning_rate": 1.6550383969126341e-06, | |
| "loss": 0.512, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.6497686582176624, | |
| "grad_norm": 3.9682317404566034, | |
| "learning_rate": 1.6517349006343025e-06, | |
| "loss": 0.5921, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.6501709917521625, | |
| "grad_norm": 3.824331386002604, | |
| "learning_rate": 1.6484330773689292e-06, | |
| "loss": 0.4712, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.6505733252866627, | |
| "grad_norm": 3.7019715035054204, | |
| "learning_rate": 1.645132933628621e-06, | |
| "loss": 0.4426, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 0.6509756588211627, | |
| "grad_norm": 4.0459494986124245, | |
| "learning_rate": 1.6418344759221734e-06, | |
| "loss": 0.6728, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.6513779923556628, | |
| "grad_norm": 4.154812270453969, | |
| "learning_rate": 1.6385377107550543e-06, | |
| "loss": 0.559, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 0.651780325890163, | |
| "grad_norm": 4.703387446268752, | |
| "learning_rate": 1.6352426446293952e-06, | |
| "loss": 0.5548, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.652182659424663, | |
| "grad_norm": 3.894555312354832, | |
| "learning_rate": 1.631949284043976e-06, | |
| "loss": 0.4987, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 0.6525849929591632, | |
| "grad_norm": 4.281016820029128, | |
| "learning_rate": 1.628657635494213e-06, | |
| "loss": 0.6007, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.6529873264936632, | |
| "grad_norm": 3.708021853167261, | |
| "learning_rate": 1.625367705472147e-06, | |
| "loss": 0.5041, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 0.6533896600281633, | |
| "grad_norm": 4.184667272519736, | |
| "learning_rate": 1.6220795004664264e-06, | |
| "loss": 0.563, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.6537919935626635, | |
| "grad_norm": 4.386544791097414, | |
| "learning_rate": 1.6187930269623001e-06, | |
| "loss": 0.6591, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.6541943270971635, | |
| "grad_norm": 4.095997806253382, | |
| "learning_rate": 1.6155082914416018e-06, | |
| "loss": 0.5122, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.6545966606316637, | |
| "grad_norm": 3.97582656513418, | |
| "learning_rate": 1.6122253003827353e-06, | |
| "loss": 0.5805, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 0.6549989941661637, | |
| "grad_norm": 3.990901166079429, | |
| "learning_rate": 1.6089440602606675e-06, | |
| "loss": 0.569, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.6554013277006638, | |
| "grad_norm": 3.8624368116988403, | |
| "learning_rate": 1.6056645775469077e-06, | |
| "loss": 0.4729, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 0.655803661235164, | |
| "grad_norm": 4.529362920268054, | |
| "learning_rate": 1.6023868587095016e-06, | |
| "loss": 0.5482, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.656205994769664, | |
| "grad_norm": 4.017565552354934, | |
| "learning_rate": 1.599110910213016e-06, | |
| "loss": 0.4916, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 0.6566083283041642, | |
| "grad_norm": 4.4182156313212575, | |
| "learning_rate": 1.595836738518527e-06, | |
| "loss": 0.5792, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.6570106618386643, | |
| "grad_norm": 3.5677962985553524, | |
| "learning_rate": 1.592564350083603e-06, | |
| "loss": 0.4697, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 0.6574129953731643, | |
| "grad_norm": 3.909711067921425, | |
| "learning_rate": 1.5892937513622977e-06, | |
| "loss": 0.4383, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.6578153289076645, | |
| "grad_norm": 4.098825788465001, | |
| "learning_rate": 1.5860249488051355e-06, | |
| "loss": 0.4745, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.6582176624421645, | |
| "grad_norm": 3.2004894026603616, | |
| "learning_rate": 1.5827579488590977e-06, | |
| "loss": 0.4775, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.6586199959766647, | |
| "grad_norm": 3.970341598297501, | |
| "learning_rate": 1.5794927579676089e-06, | |
| "loss": 0.5471, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 0.6590223295111648, | |
| "grad_norm": 4.241718162619275, | |
| "learning_rate": 1.5762293825705276e-06, | |
| "loss": 0.6544, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.6594246630456648, | |
| "grad_norm": 3.924819583311759, | |
| "learning_rate": 1.5729678291041318e-06, | |
| "loss": 0.5368, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 0.659826996580165, | |
| "grad_norm": 3.8382551527997397, | |
| "learning_rate": 1.5697081040011043e-06, | |
| "loss": 0.5121, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.660229330114665, | |
| "grad_norm": 3.6682266397110337, | |
| "learning_rate": 1.5664502136905236e-06, | |
| "loss": 0.4353, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 0.6606316636491651, | |
| "grad_norm": 3.9060845453518143, | |
| "learning_rate": 1.563194164597848e-06, | |
| "loss": 0.4814, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.6610339971836653, | |
| "grad_norm": 3.985541657912861, | |
| "learning_rate": 1.5599399631449052e-06, | |
| "loss": 0.4643, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 0.6614363307181653, | |
| "grad_norm": 4.246558106503365, | |
| "learning_rate": 1.5566876157498797e-06, | |
| "loss": 0.5457, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.6618386642526655, | |
| "grad_norm": 4.538356504223212, | |
| "learning_rate": 1.5534371288272992e-06, | |
| "loss": 0.5586, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.6622409977871656, | |
| "grad_norm": 3.512817123486096, | |
| "learning_rate": 1.5501885087880187e-06, | |
| "loss": 0.3777, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.6626433313216656, | |
| "grad_norm": 3.611315735282059, | |
| "learning_rate": 1.546941762039216e-06, | |
| "loss": 0.4559, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 0.6630456648561658, | |
| "grad_norm": 3.804900233889627, | |
| "learning_rate": 1.5436968949843705e-06, | |
| "loss": 0.5593, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.6634479983906658, | |
| "grad_norm": 4.290281020900778, | |
| "learning_rate": 1.5404539140232572e-06, | |
| "loss": 0.5433, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 0.663850331925166, | |
| "grad_norm": 4.261399800624298, | |
| "learning_rate": 1.5372128255519298e-06, | |
| "loss": 0.5669, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6642526654596661, | |
| "grad_norm": 4.8206169191275094, | |
| "learning_rate": 1.533973635962708e-06, | |
| "loss": 0.535, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 0.6646549989941661, | |
| "grad_norm": 3.5705866846183882, | |
| "learning_rate": 1.5307363516441693e-06, | |
| "loss": 0.4436, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.6650573325286663, | |
| "grad_norm": 4.015284104793611, | |
| "learning_rate": 1.5275009789811317e-06, | |
| "loss": 0.4606, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 0.6654596660631663, | |
| "grad_norm": 3.746888831379427, | |
| "learning_rate": 1.5242675243546435e-06, | |
| "loss": 0.5344, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.6658619995976665, | |
| "grad_norm": 5.350374969998737, | |
| "learning_rate": 1.5210359941419698e-06, | |
| "loss": 0.7187, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.6662643331321666, | |
| "grad_norm": 4.992414873463243, | |
| "learning_rate": 1.5178063947165795e-06, | |
| "loss": 0.5597, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 3.9536138723501413, | |
| "learning_rate": 1.514578732448135e-06, | |
| "loss": 0.5345, | |
| "step": 1657 | |
| }, | |
| { | |
| "epoch": 0.6670690002011668, | |
| "grad_norm": 4.023274067891729, | |
| "learning_rate": 1.5113530137024774e-06, | |
| "loss": 0.4893, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.6674713337356669, | |
| "grad_norm": 4.108018429855622, | |
| "learning_rate": 1.5081292448416158e-06, | |
| "loss": 0.645, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 0.667873667270167, | |
| "grad_norm": 3.817031046521698, | |
| "learning_rate": 1.5049074322237097e-06, | |
| "loss": 0.4838, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6682760008046671, | |
| "grad_norm": 4.10734795999481, | |
| "learning_rate": 1.5016875822030651e-06, | |
| "loss": 0.5853, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 0.6686783343391671, | |
| "grad_norm": 4.104736058188134, | |
| "learning_rate": 1.4984697011301142e-06, | |
| "loss": 0.5032, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.6690806678736673, | |
| "grad_norm": 4.213736489738209, | |
| "learning_rate": 1.4952537953514078e-06, | |
| "loss": 0.4234, | |
| "step": 1663 | |
| }, | |
| { | |
| "epoch": 0.6694830014081674, | |
| "grad_norm": 4.592368631770386, | |
| "learning_rate": 1.4920398712095985e-06, | |
| "loss": 0.5323, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.6698853349426674, | |
| "grad_norm": 4.593839464020305, | |
| "learning_rate": 1.4888279350434325e-06, | |
| "loss": 0.5821, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.6702876684771676, | |
| "grad_norm": 3.881133737659202, | |
| "learning_rate": 1.485617993187735e-06, | |
| "loss": 0.5224, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.6706900020116676, | |
| "grad_norm": 4.088068946795046, | |
| "learning_rate": 1.4824100519733985e-06, | |
| "loss": 0.556, | |
| "step": 1667 | |
| }, | |
| { | |
| "epoch": 0.6710923355461678, | |
| "grad_norm": 4.328495324161391, | |
| "learning_rate": 1.4792041177273668e-06, | |
| "loss": 0.6097, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.6714946690806679, | |
| "grad_norm": 4.220316709489982, | |
| "learning_rate": 1.4760001967726283e-06, | |
| "loss": 0.6015, | |
| "step": 1669 | |
| }, | |
| { | |
| "epoch": 0.6718970026151679, | |
| "grad_norm": 3.788396547796328, | |
| "learning_rate": 1.472798295428199e-06, | |
| "loss": 0.5863, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6722993361496681, | |
| "grad_norm": 3.8086553750558494, | |
| "learning_rate": 1.4695984200091129e-06, | |
| "loss": 0.5155, | |
| "step": 1671 | |
| }, | |
| { | |
| "epoch": 0.6727016696841682, | |
| "grad_norm": 3.792467737324882, | |
| "learning_rate": 1.4664005768264084e-06, | |
| "loss": 0.4517, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.6731040032186683, | |
| "grad_norm": 4.528886592192105, | |
| "learning_rate": 1.4632047721871134e-06, | |
| "loss": 0.5514, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 0.6735063367531684, | |
| "grad_norm": 3.687134247150264, | |
| "learning_rate": 1.4600110123942376e-06, | |
| "loss": 0.5765, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.6739086702876684, | |
| "grad_norm": 3.651752055706122, | |
| "learning_rate": 1.4568193037467574e-06, | |
| "loss": 0.4434, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.6743110038221686, | |
| "grad_norm": 4.196555677667497, | |
| "learning_rate": 1.453629652539604e-06, | |
| "loss": 0.5832, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.6747133373566687, | |
| "grad_norm": 3.3768494467370047, | |
| "learning_rate": 1.4504420650636485e-06, | |
| "loss": 0.4094, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 0.6751156708911688, | |
| "grad_norm": 3.9350013310511582, | |
| "learning_rate": 1.4472565476056952e-06, | |
| "loss": 0.4875, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.6755180044256689, | |
| "grad_norm": 3.9364895166754383, | |
| "learning_rate": 1.444073106448462e-06, | |
| "loss": 0.626, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 0.6759203379601689, | |
| "grad_norm": 4.218774138421791, | |
| "learning_rate": 1.4408917478705748e-06, | |
| "loss": 0.5318, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6763226714946691, | |
| "grad_norm": 3.557852927029882, | |
| "learning_rate": 1.4377124781465517e-06, | |
| "loss": 0.4564, | |
| "step": 1681 | |
| }, | |
| { | |
| "epoch": 0.6767250050291692, | |
| "grad_norm": 4.131292301591179, | |
| "learning_rate": 1.4345353035467888e-06, | |
| "loss": 0.5458, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.6771273385636692, | |
| "grad_norm": 4.261120314757306, | |
| "learning_rate": 1.4313602303375523e-06, | |
| "loss": 0.6402, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 0.6775296720981694, | |
| "grad_norm": 4.027439558487913, | |
| "learning_rate": 1.4281872647809628e-06, | |
| "loss": 0.5762, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.6779320056326695, | |
| "grad_norm": 3.9491401859678605, | |
| "learning_rate": 1.4250164131349858e-06, | |
| "loss": 0.4807, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.6783343391671696, | |
| "grad_norm": 3.60940986670999, | |
| "learning_rate": 1.421847681653414e-06, | |
| "loss": 0.3659, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.6787366727016697, | |
| "grad_norm": 3.5822407819650515, | |
| "learning_rate": 1.4186810765858616e-06, | |
| "loss": 0.5165, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 0.6791390062361697, | |
| "grad_norm": 4.161182758555724, | |
| "learning_rate": 1.4155166041777483e-06, | |
| "loss": 0.495, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.6795413397706699, | |
| "grad_norm": 4.076431248542256, | |
| "learning_rate": 1.4123542706702868e-06, | |
| "loss": 0.4569, | |
| "step": 1689 | |
| }, | |
| { | |
| "epoch": 0.67994367330517, | |
| "grad_norm": 3.401944784975411, | |
| "learning_rate": 1.409194082300473e-06, | |
| "loss": 0.4661, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6803460068396701, | |
| "grad_norm": 4.053896920545759, | |
| "learning_rate": 1.4060360453010691e-06, | |
| "loss": 0.6945, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 0.6807483403741702, | |
| "grad_norm": 3.6315876209619726, | |
| "learning_rate": 1.4028801659005975e-06, | |
| "loss": 0.5088, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.6811506739086702, | |
| "grad_norm": 3.9033263695876617, | |
| "learning_rate": 1.3997264503233222e-06, | |
| "loss": 0.5535, | |
| "step": 1693 | |
| }, | |
| { | |
| "epoch": 0.6815530074431704, | |
| "grad_norm": 3.6520508454132568, | |
| "learning_rate": 1.3965749047892421e-06, | |
| "loss": 0.4627, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.6819553409776705, | |
| "grad_norm": 4.253145100591827, | |
| "learning_rate": 1.3934255355140735e-06, | |
| "loss": 0.6059, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.6823576745121706, | |
| "grad_norm": 3.6442609114551168, | |
| "learning_rate": 1.3902783487092436e-06, | |
| "loss": 0.437, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.6827600080466707, | |
| "grad_norm": 3.6843000132081083, | |
| "learning_rate": 1.387133350581873e-06, | |
| "loss": 0.4741, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 0.6831623415811708, | |
| "grad_norm": 4.082924701084028, | |
| "learning_rate": 1.383990547334767e-06, | |
| "loss": 0.5523, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.6835646751156709, | |
| "grad_norm": 4.525193517476986, | |
| "learning_rate": 1.3808499451663998e-06, | |
| "loss": 0.5733, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 0.683967008650171, | |
| "grad_norm": 3.8434172838289142, | |
| "learning_rate": 1.377711550270907e-06, | |
| "loss": 0.4042, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.684369342184671, | |
| "grad_norm": 3.475844585621674, | |
| "learning_rate": 1.3745753688380692e-06, | |
| "loss": 0.4492, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 0.6847716757191712, | |
| "grad_norm": 4.106466165584271, | |
| "learning_rate": 1.3714414070533022e-06, | |
| "loss": 0.4795, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.6851740092536713, | |
| "grad_norm": 3.9408796988863477, | |
| "learning_rate": 1.368309671097645e-06, | |
| "loss": 0.4586, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 0.6855763427881714, | |
| "grad_norm": 3.892878635048642, | |
| "learning_rate": 1.3651801671477435e-06, | |
| "loss": 0.5256, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.6859786763226715, | |
| "grad_norm": 3.602589673252341, | |
| "learning_rate": 1.3620529013758444e-06, | |
| "loss": 0.4474, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.6863810098571715, | |
| "grad_norm": 4.29050518808401, | |
| "learning_rate": 1.3589278799497802e-06, | |
| "loss": 0.4799, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.6867833433916717, | |
| "grad_norm": 4.530188596691838, | |
| "learning_rate": 1.3558051090329536e-06, | |
| "loss": 0.6044, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 0.6871856769261718, | |
| "grad_norm": 3.8842771157381932, | |
| "learning_rate": 1.352684594784333e-06, | |
| "loss": 0.4759, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.6875880104606719, | |
| "grad_norm": 4.206785571596244, | |
| "learning_rate": 1.3495663433584327e-06, | |
| "loss": 0.58, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 0.687990343995172, | |
| "grad_norm": 4.648975138578508, | |
| "learning_rate": 1.3464503609053053e-06, | |
| "loss": 0.5508, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6883926775296721, | |
| "grad_norm": 4.513492173355037, | |
| "learning_rate": 1.3433366535705292e-06, | |
| "loss": 0.6606, | |
| "step": 1711 | |
| }, | |
| { | |
| "epoch": 0.6887950110641722, | |
| "grad_norm": 4.149085872177257, | |
| "learning_rate": 1.3402252274951954e-06, | |
| "loss": 0.5438, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.6891973445986723, | |
| "grad_norm": 4.3080582119504935, | |
| "learning_rate": 1.3371160888158935e-06, | |
| "loss": 0.7277, | |
| "step": 1713 | |
| }, | |
| { | |
| "epoch": 0.6895996781331724, | |
| "grad_norm": 4.195944255893043, | |
| "learning_rate": 1.3340092436647045e-06, | |
| "loss": 0.5682, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.6900020116676725, | |
| "grad_norm": 4.22717459587227, | |
| "learning_rate": 1.330904698169184e-06, | |
| "loss": 0.5394, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.6904043452021726, | |
| "grad_norm": 3.8328998433470334, | |
| "learning_rate": 1.3278024584523542e-06, | |
| "loss": 0.519, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.6908066787366727, | |
| "grad_norm": 3.809809815071977, | |
| "learning_rate": 1.324702530632686e-06, | |
| "loss": 0.4609, | |
| "step": 1717 | |
| }, | |
| { | |
| "epoch": 0.6912090122711728, | |
| "grad_norm": 4.408644772842878, | |
| "learning_rate": 1.3216049208240945e-06, | |
| "loss": 0.5259, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.6916113458056729, | |
| "grad_norm": 4.351040955206057, | |
| "learning_rate": 1.3185096351359211e-06, | |
| "loss": 0.531, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 0.692013679340173, | |
| "grad_norm": 3.8862458710646135, | |
| "learning_rate": 1.3154166796729245e-06, | |
| "loss": 0.5521, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6924160128746731, | |
| "grad_norm": 3.8732946833809283, | |
| "learning_rate": 1.3123260605352647e-06, | |
| "loss": 0.4054, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 0.6928183464091732, | |
| "grad_norm": 4.014263742357025, | |
| "learning_rate": 1.3092377838184984e-06, | |
| "loss": 0.5096, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.6932206799436733, | |
| "grad_norm": 3.8457014699738306, | |
| "learning_rate": 1.3061518556135572e-06, | |
| "loss": 0.5361, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 0.6936230134781735, | |
| "grad_norm": 4.700764158732767, | |
| "learning_rate": 1.3030682820067447e-06, | |
| "loss": 0.6363, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.6940253470126735, | |
| "grad_norm": 4.056861341126563, | |
| "learning_rate": 1.2999870690797207e-06, | |
| "loss": 0.5499, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.6944276805471736, | |
| "grad_norm": 3.824223111444158, | |
| "learning_rate": 1.2969082229094853e-06, | |
| "loss": 0.5049, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.6948300140816737, | |
| "grad_norm": 3.8066989110426332, | |
| "learning_rate": 1.2938317495683744e-06, | |
| "loss": 0.487, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 0.6952323476161738, | |
| "grad_norm": 4.382851529690001, | |
| "learning_rate": 1.2907576551240425e-06, | |
| "loss": 0.5016, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.695634681150674, | |
| "grad_norm": 4.2709658357684415, | |
| "learning_rate": 1.2876859456394537e-06, | |
| "loss": 0.5287, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 0.696037014685174, | |
| "grad_norm": 4.10428141385855, | |
| "learning_rate": 1.284616627172865e-06, | |
| "loss": 0.5048, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6964393482196741, | |
| "grad_norm": 3.9240197132726133, | |
| "learning_rate": 1.281549705777821e-06, | |
| "loss": 0.5834, | |
| "step": 1731 | |
| }, | |
| { | |
| "epoch": 0.6968416817541742, | |
| "grad_norm": 3.917832710676123, | |
| "learning_rate": 1.278485187503137e-06, | |
| "loss": 0.4978, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.6972440152886743, | |
| "grad_norm": 3.9886241574028123, | |
| "learning_rate": 1.2754230783928907e-06, | |
| "loss": 0.5046, | |
| "step": 1733 | |
| }, | |
| { | |
| "epoch": 0.6976463488231744, | |
| "grad_norm": 3.8252585006207744, | |
| "learning_rate": 1.2723633844864036e-06, | |
| "loss": 0.4337, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.6980486823576745, | |
| "grad_norm": 4.327909530843214, | |
| "learning_rate": 1.2693061118182399e-06, | |
| "loss": 0.5663, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.6984510158921746, | |
| "grad_norm": 3.9729308104472225, | |
| "learning_rate": 1.2662512664181826e-06, | |
| "loss": 0.4753, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.6988533494266748, | |
| "grad_norm": 3.4193536839822434, | |
| "learning_rate": 1.2631988543112314e-06, | |
| "loss": 0.3984, | |
| "step": 1737 | |
| }, | |
| { | |
| "epoch": 0.6992556829611748, | |
| "grad_norm": 3.199663843957577, | |
| "learning_rate": 1.2601488815175867e-06, | |
| "loss": 0.3615, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.6996580164956749, | |
| "grad_norm": 4.275095481044542, | |
| "learning_rate": 1.257101354052635e-06, | |
| "loss": 0.635, | |
| "step": 1739 | |
| }, | |
| { | |
| "epoch": 0.700060350030175, | |
| "grad_norm": 4.154894699295693, | |
| "learning_rate": 1.2540562779269421e-06, | |
| "loss": 0.468, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7004626835646751, | |
| "grad_norm": 4.21271106717443, | |
| "learning_rate": 1.2510136591462399e-06, | |
| "loss": 0.557, | |
| "step": 1741 | |
| }, | |
| { | |
| "epoch": 0.7008650170991753, | |
| "grad_norm": 3.9488032178250276, | |
| "learning_rate": 1.2479735037114118e-06, | |
| "loss": 0.4743, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.7012673506336753, | |
| "grad_norm": 3.6437407466020453, | |
| "learning_rate": 1.2449358176184848e-06, | |
| "loss": 0.4958, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 0.7016696841681754, | |
| "grad_norm": 4.011291060246906, | |
| "learning_rate": 1.2419006068586125e-06, | |
| "loss": 0.5109, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.7020720177026755, | |
| "grad_norm": 3.7545390991024026, | |
| "learning_rate": 1.2388678774180698e-06, | |
| "loss": 0.5269, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.7024743512371756, | |
| "grad_norm": 3.821504931037996, | |
| "learning_rate": 1.2358376352782358e-06, | |
| "loss": 0.4905, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.7028766847716758, | |
| "grad_norm": 4.3580981207011655, | |
| "learning_rate": 1.232809886415586e-06, | |
| "loss": 0.4904, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 0.7032790183061758, | |
| "grad_norm": 3.539055820090707, | |
| "learning_rate": 1.2297846368016748e-06, | |
| "loss": 0.4124, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.7036813518406759, | |
| "grad_norm": 3.9844890147713254, | |
| "learning_rate": 1.2267618924031317e-06, | |
| "loss": 0.5321, | |
| "step": 1749 | |
| }, | |
| { | |
| "epoch": 0.7040836853751761, | |
| "grad_norm": 4.178112023828336, | |
| "learning_rate": 1.2237416591816412e-06, | |
| "loss": 0.6362, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7044860189096761, | |
| "grad_norm": 4.0472275399599695, | |
| "learning_rate": 1.220723943093939e-06, | |
| "loss": 0.5101, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 0.7048883524441762, | |
| "grad_norm": 4.312523214514084, | |
| "learning_rate": 1.2177087500917925e-06, | |
| "loss": 0.5277, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.7052906859786763, | |
| "grad_norm": 4.133768938615848, | |
| "learning_rate": 1.2146960861219954e-06, | |
| "loss": 0.5271, | |
| "step": 1753 | |
| }, | |
| { | |
| "epoch": 0.7056930195131764, | |
| "grad_norm": 4.430552507921185, | |
| "learning_rate": 1.211685957126353e-06, | |
| "loss": 0.711, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.7060953530476766, | |
| "grad_norm": 4.119451270646705, | |
| "learning_rate": 1.2086783690416704e-06, | |
| "loss": 0.4358, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.7064976865821766, | |
| "grad_norm": 4.295693476571627, | |
| "learning_rate": 1.205673327799743e-06, | |
| "loss": 0.5466, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.7069000201166767, | |
| "grad_norm": 4.312446284251359, | |
| "learning_rate": 1.2026708393273392e-06, | |
| "loss": 0.5861, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 0.7073023536511768, | |
| "grad_norm": 3.9109516304734733, | |
| "learning_rate": 1.1996709095461967e-06, | |
| "loss": 0.4782, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.7077046871856769, | |
| "grad_norm": 4.425022693894618, | |
| "learning_rate": 1.1966735443730052e-06, | |
| "loss": 0.5627, | |
| "step": 1759 | |
| }, | |
| { | |
| "epoch": 0.7081070207201771, | |
| "grad_norm": 4.259706435608531, | |
| "learning_rate": 1.193678749719397e-06, | |
| "loss": 0.5274, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.7085093542546771, | |
| "grad_norm": 4.219902699982978, | |
| "learning_rate": 1.190686531491932e-06, | |
| "loss": 0.5397, | |
| "step": 1761 | |
| }, | |
| { | |
| "epoch": 0.7089116877891772, | |
| "grad_norm": 4.105043115699033, | |
| "learning_rate": 1.1876968955920916e-06, | |
| "loss": 0.4845, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.7093140213236774, | |
| "grad_norm": 3.905684008250733, | |
| "learning_rate": 1.1847098479162644e-06, | |
| "loss": 0.5769, | |
| "step": 1763 | |
| }, | |
| { | |
| "epoch": 0.7097163548581774, | |
| "grad_norm": 4.0434341189775616, | |
| "learning_rate": 1.1817253943557308e-06, | |
| "loss": 0.4338, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.7101186883926776, | |
| "grad_norm": 4.331648953621162, | |
| "learning_rate": 1.1787435407966593e-06, | |
| "loss": 0.6231, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.7105210219271776, | |
| "grad_norm": 4.225800580764582, | |
| "learning_rate": 1.1757642931200864e-06, | |
| "loss": 0.4946, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.7109233554616777, | |
| "grad_norm": 3.832934508222693, | |
| "learning_rate": 1.1727876572019117e-06, | |
| "loss": 0.5319, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 0.7113256889961779, | |
| "grad_norm": 3.994089631167334, | |
| "learning_rate": 1.1698136389128833e-06, | |
| "loss": 0.5005, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.7117280225306779, | |
| "grad_norm": 3.8399445750919425, | |
| "learning_rate": 1.166842244118587e-06, | |
| "loss": 0.4726, | |
| "step": 1769 | |
| }, | |
| { | |
| "epoch": 0.712130356065178, | |
| "grad_norm": 4.043691249667768, | |
| "learning_rate": 1.163873478679432e-06, | |
| "loss": 0.4885, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.7125326895996781, | |
| "grad_norm": 4.26586065507147, | |
| "learning_rate": 1.1609073484506441e-06, | |
| "loss": 0.5955, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 0.7129350231341782, | |
| "grad_norm": 3.2723853825243006, | |
| "learning_rate": 1.157943859282251e-06, | |
| "loss": 0.393, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.7133373566686784, | |
| "grad_norm": 4.228399556429077, | |
| "learning_rate": 1.1549830170190714e-06, | |
| "loss": 0.6138, | |
| "step": 1773 | |
| }, | |
| { | |
| "epoch": 0.7137396902031784, | |
| "grad_norm": 3.703790105757895, | |
| "learning_rate": 1.152024827500705e-06, | |
| "loss": 0.4952, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.7141420237376785, | |
| "grad_norm": 4.037603384099717, | |
| "learning_rate": 1.149069296561516e-06, | |
| "loss": 0.5791, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.7145443572721787, | |
| "grad_norm": 3.958021889837016, | |
| "learning_rate": 1.1461164300306292e-06, | |
| "loss": 0.495, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.7149466908066787, | |
| "grad_norm": 4.142430978673458, | |
| "learning_rate": 1.1431662337319107e-06, | |
| "loss": 0.6534, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 0.7153490243411789, | |
| "grad_norm": 4.484413621549814, | |
| "learning_rate": 1.1402187134839643e-06, | |
| "loss": 0.4885, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.7157513578756789, | |
| "grad_norm": 3.6695545389300395, | |
| "learning_rate": 1.1372738751001111e-06, | |
| "loss": 0.4446, | |
| "step": 1779 | |
| }, | |
| { | |
| "epoch": 0.716153691410179, | |
| "grad_norm": 3.492829736247777, | |
| "learning_rate": 1.134331724388387e-06, | |
| "loss": 0.5245, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.7165560249446792, | |
| "grad_norm": 3.8848835038152107, | |
| "learning_rate": 1.1313922671515252e-06, | |
| "loss": 0.5042, | |
| "step": 1781 | |
| }, | |
| { | |
| "epoch": 0.7169583584791792, | |
| "grad_norm": 4.4510516838349705, | |
| "learning_rate": 1.128455509186948e-06, | |
| "loss": 0.6898, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.7173606920136794, | |
| "grad_norm": 4.381017376532981, | |
| "learning_rate": 1.1255214562867503e-06, | |
| "loss": 0.5289, | |
| "step": 1783 | |
| }, | |
| { | |
| "epoch": 0.7177630255481794, | |
| "grad_norm": 3.5079718266262416, | |
| "learning_rate": 1.122590114237696e-06, | |
| "loss": 0.4724, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.7181653590826795, | |
| "grad_norm": 4.292226353007704, | |
| "learning_rate": 1.1196614888212007e-06, | |
| "loss": 0.5796, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.7185676926171797, | |
| "grad_norm": 4.052087753788445, | |
| "learning_rate": 1.1167355858133226e-06, | |
| "loss": 0.4938, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.7189700261516797, | |
| "grad_norm": 5.157980356295375, | |
| "learning_rate": 1.1138124109847503e-06, | |
| "loss": 0.5949, | |
| "step": 1787 | |
| }, | |
| { | |
| "epoch": 0.7193723596861799, | |
| "grad_norm": 3.285940613285488, | |
| "learning_rate": 1.1108919701007907e-06, | |
| "loss": 0.3908, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.71977469322068, | |
| "grad_norm": 3.5335717637010937, | |
| "learning_rate": 1.1079742689213593e-06, | |
| "loss": 0.5013, | |
| "step": 1789 | |
| }, | |
| { | |
| "epoch": 0.72017702675518, | |
| "grad_norm": 4.450180240936036, | |
| "learning_rate": 1.1050593132009703e-06, | |
| "loss": 0.6329, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.7205793602896802, | |
| "grad_norm": 4.404576812133769, | |
| "learning_rate": 1.1021471086887187e-06, | |
| "loss": 0.5781, | |
| "step": 1791 | |
| }, | |
| { | |
| "epoch": 0.7209816938241802, | |
| "grad_norm": 3.3602741770117177, | |
| "learning_rate": 1.0992376611282776e-06, | |
| "loss": 0.3621, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.7213840273586803, | |
| "grad_norm": 3.381272077924684, | |
| "learning_rate": 1.0963309762578795e-06, | |
| "loss": 0.384, | |
| "step": 1793 | |
| }, | |
| { | |
| "epoch": 0.7217863608931805, | |
| "grad_norm": 3.5780173251972025, | |
| "learning_rate": 1.0934270598103094e-06, | |
| "loss": 0.5372, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 0.7221886944276805, | |
| "grad_norm": 3.7738716419210814, | |
| "learning_rate": 1.0905259175128931e-06, | |
| "loss": 0.4336, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.7225910279621807, | |
| "grad_norm": 3.820342940812247, | |
| "learning_rate": 1.0876275550874846e-06, | |
| "loss": 0.5335, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.7229933614966807, | |
| "grad_norm": 4.176570559423123, | |
| "learning_rate": 1.0847319782504532e-06, | |
| "loss": 0.5038, | |
| "step": 1797 | |
| }, | |
| { | |
| "epoch": 0.7233956950311808, | |
| "grad_norm": 3.67077010562448, | |
| "learning_rate": 1.0818391927126764e-06, | |
| "loss": 0.4342, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.723798028565681, | |
| "grad_norm": 3.9880069700930423, | |
| "learning_rate": 1.078949204179526e-06, | |
| "loss": 0.4953, | |
| "step": 1799 | |
| }, | |
| { | |
| "epoch": 0.724200362100181, | |
| "grad_norm": 4.161334696726195, | |
| "learning_rate": 1.0760620183508582e-06, | |
| "loss": 0.5799, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7246026956346812, | |
| "grad_norm": 4.126464363948578, | |
| "learning_rate": 1.073177640920999e-06, | |
| "loss": 0.5138, | |
| "step": 1801 | |
| }, | |
| { | |
| "epoch": 0.7250050291691813, | |
| "grad_norm": 4.1895057334300505, | |
| "learning_rate": 1.0702960775787374e-06, | |
| "loss": 0.5121, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 0.7254073627036813, | |
| "grad_norm": 3.884515999748438, | |
| "learning_rate": 1.0674173340073121e-06, | |
| "loss": 0.4891, | |
| "step": 1803 | |
| }, | |
| { | |
| "epoch": 0.7258096962381815, | |
| "grad_norm": 4.286867137892461, | |
| "learning_rate": 1.0645414158844012e-06, | |
| "loss": 0.6118, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.7262120297726815, | |
| "grad_norm": 3.8959234040057806, | |
| "learning_rate": 1.0616683288821067e-06, | |
| "loss": 0.5234, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.7266143633071817, | |
| "grad_norm": 4.239978882514847, | |
| "learning_rate": 1.058798078666952e-06, | |
| "loss": 0.5183, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.7270166968416818, | |
| "grad_norm": 3.7122065670739235, | |
| "learning_rate": 1.0559306708998607e-06, | |
| "loss": 0.501, | |
| "step": 1807 | |
| }, | |
| { | |
| "epoch": 0.7274190303761818, | |
| "grad_norm": 4.556831231079228, | |
| "learning_rate": 1.0530661112361533e-06, | |
| "loss": 0.5257, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.727821363910682, | |
| "grad_norm": 4.308685532929913, | |
| "learning_rate": 1.050204405325534e-06, | |
| "loss": 0.5534, | |
| "step": 1809 | |
| }, | |
| { | |
| "epoch": 0.728223697445182, | |
| "grad_norm": 3.9297951909048368, | |
| "learning_rate": 1.0473455588120738e-06, | |
| "loss": 0.4681, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.7286260309796821, | |
| "grad_norm": 3.98667448491781, | |
| "learning_rate": 1.0444895773342092e-06, | |
| "loss": 0.4785, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 0.7290283645141823, | |
| "grad_norm": 4.2934600586771845, | |
| "learning_rate": 1.0416364665247236e-06, | |
| "loss": 0.5408, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 0.7294306980486823, | |
| "grad_norm": 3.9347493779534, | |
| "learning_rate": 1.0387862320107403e-06, | |
| "loss": 0.5895, | |
| "step": 1813 | |
| }, | |
| { | |
| "epoch": 0.7298330315831825, | |
| "grad_norm": 4.338941092246621, | |
| "learning_rate": 1.0359388794137068e-06, | |
| "loss": 0.5648, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 0.7302353651176826, | |
| "grad_norm": 3.792647800273631, | |
| "learning_rate": 1.0330944143493892e-06, | |
| "loss": 0.5449, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.7306376986521826, | |
| "grad_norm": 3.821766624740813, | |
| "learning_rate": 1.0302528424278585e-06, | |
| "loss": 0.4544, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.7310400321866828, | |
| "grad_norm": 4.176558757203708, | |
| "learning_rate": 1.0274141692534784e-06, | |
| "loss": 0.4802, | |
| "step": 1817 | |
| }, | |
| { | |
| "epoch": 0.7314423657211828, | |
| "grad_norm": 3.5626608894002803, | |
| "learning_rate": 1.0245784004248968e-06, | |
| "loss": 0.4273, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.731844699255683, | |
| "grad_norm": 3.8719939712343936, | |
| "learning_rate": 1.0217455415350316e-06, | |
| "loss": 0.5409, | |
| "step": 1819 | |
| }, | |
| { | |
| "epoch": 0.7322470327901831, | |
| "grad_norm": 4.537877117506395, | |
| "learning_rate": 1.0189155981710642e-06, | |
| "loss": 0.5705, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.7326493663246831, | |
| "grad_norm": 3.932184586794533, | |
| "learning_rate": 1.0160885759144217e-06, | |
| "loss": 0.4904, | |
| "step": 1821 | |
| }, | |
| { | |
| "epoch": 0.7330516998591833, | |
| "grad_norm": 4.266661778185627, | |
| "learning_rate": 1.0132644803407753e-06, | |
| "loss": 0.4769, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 0.7334540333936833, | |
| "grad_norm": 3.7589693532265827, | |
| "learning_rate": 1.0104433170200188e-06, | |
| "loss": 0.4891, | |
| "step": 1823 | |
| }, | |
| { | |
| "epoch": 0.7338563669281835, | |
| "grad_norm": 3.8562553041075263, | |
| "learning_rate": 1.007625091516266e-06, | |
| "loss": 0.417, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.7342587004626836, | |
| "grad_norm": 4.130360177598847, | |
| "learning_rate": 1.0048098093878352e-06, | |
| "loss": 0.5207, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.7346610339971836, | |
| "grad_norm": 4.16513812007776, | |
| "learning_rate": 1.001997476187241e-06, | |
| "loss": 0.5213, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 0.7350633675316838, | |
| "grad_norm": 4.4208377164885055, | |
| "learning_rate": 9.991880974611809e-07, | |
| "loss": 0.6269, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 0.7354657010661839, | |
| "grad_norm": 4.867032713419264, | |
| "learning_rate": 9.96381678750524e-07, | |
| "loss": 0.4808, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.735868034600684, | |
| "grad_norm": 3.982545780740484, | |
| "learning_rate": 9.935782255903034e-07, | |
| "loss": 0.5066, | |
| "step": 1829 | |
| }, | |
| { | |
| "epoch": 0.7362703681351841, | |
| "grad_norm": 4.286351965336647, | |
| "learning_rate": 9.907777435097029e-07, | |
| "loss": 0.5151, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.7366727016696841, | |
| "grad_norm": 4.362229906545009, | |
| "learning_rate": 9.879802380320473e-07, | |
| "loss": 0.5936, | |
| "step": 1831 | |
| }, | |
| { | |
| "epoch": 0.7370750352041843, | |
| "grad_norm": 4.660993080328005, | |
| "learning_rate": 9.851857146747873e-07, | |
| "loss": 0.6829, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.7374773687386844, | |
| "grad_norm": 4.222997821431192, | |
| "learning_rate": 9.82394178949497e-07, | |
| "loss": 0.514, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 0.7378797022731844, | |
| "grad_norm": 4.369792251301722, | |
| "learning_rate": 9.796056363618533e-07, | |
| "loss": 0.5742, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.7382820358076846, | |
| "grad_norm": 4.422672878513299, | |
| "learning_rate": 9.768200924116338e-07, | |
| "loss": 0.568, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.7386843693421846, | |
| "grad_norm": 4.507594407110887, | |
| "learning_rate": 9.740375525926988e-07, | |
| "loss": 0.6017, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 0.7390867028766848, | |
| "grad_norm": 3.8125967279749835, | |
| "learning_rate": 9.71258022392985e-07, | |
| "loss": 0.5959, | |
| "step": 1837 | |
| }, | |
| { | |
| "epoch": 0.7394890364111849, | |
| "grad_norm": 3.7273585439154586, | |
| "learning_rate": 9.684815072944946e-07, | |
| "loss": 0.3825, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 0.7398913699456849, | |
| "grad_norm": 3.738805120372397, | |
| "learning_rate": 9.657080127732807e-07, | |
| "loss": 0.4931, | |
| "step": 1839 | |
| }, | |
| { | |
| "epoch": 0.7402937034801851, | |
| "grad_norm": 3.9828190911127526, | |
| "learning_rate": 9.629375442994418e-07, | |
| "loss": 0.5182, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.7406960370146852, | |
| "grad_norm": 3.9798516908695882, | |
| "learning_rate": 9.601701073371045e-07, | |
| "loss": 0.548, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 0.7410983705491853, | |
| "grad_norm": 3.7199861649210715, | |
| "learning_rate": 9.574057073444195e-07, | |
| "loss": 0.468, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 0.7415007040836854, | |
| "grad_norm": 4.0680484984054335, | |
| "learning_rate": 9.546443497735467e-07, | |
| "loss": 0.5782, | |
| "step": 1843 | |
| }, | |
| { | |
| "epoch": 0.7419030376181854, | |
| "grad_norm": 3.4109362486846115, | |
| "learning_rate": 9.518860400706465e-07, | |
| "loss": 0.4245, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.7423053711526856, | |
| "grad_norm": 3.931799967484987, | |
| "learning_rate": 9.491307836758651e-07, | |
| "loss": 0.4863, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.7427077046871857, | |
| "grad_norm": 4.211928044347116, | |
| "learning_rate": 9.463785860233301e-07, | |
| "loss": 0.5459, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 0.7431100382216858, | |
| "grad_norm": 4.0902800868050715, | |
| "learning_rate": 9.436294525411357e-07, | |
| "loss": 0.6375, | |
| "step": 1847 | |
| }, | |
| { | |
| "epoch": 0.7435123717561859, | |
| "grad_norm": 3.524293359481542, | |
| "learning_rate": 9.408833886513303e-07, | |
| "loss": 0.444, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.7439147052906859, | |
| "grad_norm": 3.6061388537955827, | |
| "learning_rate": 9.381403997699117e-07, | |
| "loss": 0.5217, | |
| "step": 1849 | |
| }, | |
| { | |
| "epoch": 0.7443170388251861, | |
| "grad_norm": 3.6547603411472838, | |
| "learning_rate": 9.354004913068096e-07, | |
| "loss": 0.5229, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7447193723596862, | |
| "grad_norm": 4.340731245011316, | |
| "learning_rate": 9.326636686658808e-07, | |
| "loss": 0.5924, | |
| "step": 1851 | |
| }, | |
| { | |
| "epoch": 0.7451217058941862, | |
| "grad_norm": 3.4273270658083166, | |
| "learning_rate": 9.299299372448953e-07, | |
| "loss": 0.4941, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.7455240394286864, | |
| "grad_norm": 3.9330296000490668, | |
| "learning_rate": 9.271993024355263e-07, | |
| "loss": 0.5988, | |
| "step": 1853 | |
| }, | |
| { | |
| "epoch": 0.7459263729631865, | |
| "grad_norm": 4.79047981984662, | |
| "learning_rate": 9.244717696233388e-07, | |
| "loss": 0.6193, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 0.7463287064976866, | |
| "grad_norm": 3.6277035412847924, | |
| "learning_rate": 9.217473441877805e-07, | |
| "loss": 0.4905, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.7467310400321867, | |
| "grad_norm": 4.652255454626382, | |
| "learning_rate": 9.19026031502171e-07, | |
| "loss": 0.6231, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.7471333735666867, | |
| "grad_norm": 4.139619345656057, | |
| "learning_rate": 9.1630783693369e-07, | |
| "loss": 0.4981, | |
| "step": 1857 | |
| }, | |
| { | |
| "epoch": 0.7475357071011869, | |
| "grad_norm": 5.059383612132635, | |
| "learning_rate": 9.135927658433685e-07, | |
| "loss": 0.7292, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 0.747938040635687, | |
| "grad_norm": 4.664959181536374, | |
| "learning_rate": 9.108808235860744e-07, | |
| "loss": 0.6118, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 0.7483403741701871, | |
| "grad_norm": 4.243108848129753, | |
| "learning_rate": 9.081720155105076e-07, | |
| "loss": 0.5616, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.7487427077046872, | |
| "grad_norm": 3.4693576707426548, | |
| "learning_rate": 9.054663469591862e-07, | |
| "loss": 0.4451, | |
| "step": 1861 | |
| }, | |
| { | |
| "epoch": 0.7491450412391872, | |
| "grad_norm": 4.037761942838199, | |
| "learning_rate": 9.027638232684339e-07, | |
| "loss": 0.5354, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.7495473747736874, | |
| "grad_norm": 4.037140323875402, | |
| "learning_rate": 9.00064449768375e-07, | |
| "loss": 0.3946, | |
| "step": 1863 | |
| }, | |
| { | |
| "epoch": 0.7499497083081875, | |
| "grad_norm": 4.420498117292749, | |
| "learning_rate": 8.97368231782918e-07, | |
| "loss": 0.6091, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.7503520418426876, | |
| "grad_norm": 4.098542987137017, | |
| "learning_rate": 8.946751746297494e-07, | |
| "loss": 0.6347, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.7507543753771877, | |
| "grad_norm": 3.718783314907544, | |
| "learning_rate": 8.919852836203224e-07, | |
| "loss": 0.5523, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 0.7511567089116878, | |
| "grad_norm": 4.238567081312867, | |
| "learning_rate": 8.892985640598434e-07, | |
| "loss": 0.5716, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 0.7515590424461879, | |
| "grad_norm": 3.878960051531824, | |
| "learning_rate": 8.866150212472657e-07, | |
| "loss": 0.5552, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.751961375980688, | |
| "grad_norm": 4.0405823701369155, | |
| "learning_rate": 8.839346604752763e-07, | |
| "loss": 0.5286, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 0.752363709515188, | |
| "grad_norm": 4.251404967808422, | |
| "learning_rate": 8.81257487030287e-07, | |
| "loss": 0.4872, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.7527660430496882, | |
| "grad_norm": 3.6336762556932163, | |
| "learning_rate": 8.785835061924234e-07, | |
| "loss": 0.4301, | |
| "step": 1871 | |
| }, | |
| { | |
| "epoch": 0.7531683765841883, | |
| "grad_norm": 4.324873818573403, | |
| "learning_rate": 8.759127232355128e-07, | |
| "loss": 0.5405, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 0.7535707101186884, | |
| "grad_norm": 4.42447965555661, | |
| "learning_rate": 8.732451434270767e-07, | |
| "loss": 0.5209, | |
| "step": 1873 | |
| }, | |
| { | |
| "epoch": 0.7539730436531885, | |
| "grad_norm": 4.251290292687751, | |
| "learning_rate": 8.705807720283199e-07, | |
| "loss": 0.5951, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 0.7543753771876885, | |
| "grad_norm": 4.171584555966765, | |
| "learning_rate": 8.679196142941173e-07, | |
| "loss": 0.6086, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.7547777107221887, | |
| "grad_norm": 4.0958958819445614, | |
| "learning_rate": 8.652616754730075e-07, | |
| "loss": 0.5693, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.7551800442566888, | |
| "grad_norm": 3.683062028222347, | |
| "learning_rate": 8.626069608071785e-07, | |
| "loss": 0.4584, | |
| "step": 1877 | |
| }, | |
| { | |
| "epoch": 0.7555823777911889, | |
| "grad_norm": 3.9012710111822924, | |
| "learning_rate": 8.599554755324613e-07, | |
| "loss": 0.4611, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.755984711325689, | |
| "grad_norm": 3.5489182194802704, | |
| "learning_rate": 8.573072248783165e-07, | |
| "loss": 0.5083, | |
| "step": 1879 | |
| }, | |
| { | |
| "epoch": 0.7563870448601892, | |
| "grad_norm": 3.9884722155489225, | |
| "learning_rate": 8.546622140678265e-07, | |
| "loss": 0.5094, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7567893783946892, | |
| "grad_norm": 4.328049459143733, | |
| "learning_rate": 8.520204483176814e-07, | |
| "loss": 0.3656, | |
| "step": 1881 | |
| }, | |
| { | |
| "epoch": 0.7571917119291893, | |
| "grad_norm": 3.9339309591347087, | |
| "learning_rate": 8.493819328381731e-07, | |
| "loss": 0.5317, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 0.7575940454636894, | |
| "grad_norm": 4.2270154261756385, | |
| "learning_rate": 8.467466728331828e-07, | |
| "loss": 0.4588, | |
| "step": 1883 | |
| }, | |
| { | |
| "epoch": 0.7579963789981895, | |
| "grad_norm": 3.303945186981914, | |
| "learning_rate": 8.441146735001715e-07, | |
| "loss": 0.4649, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 0.7583987125326896, | |
| "grad_norm": 4.054188026080457, | |
| "learning_rate": 8.414859400301667e-07, | |
| "loss": 0.5645, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.7588010460671897, | |
| "grad_norm": 4.17992148808867, | |
| "learning_rate": 8.388604776077575e-07, | |
| "loss": 0.5204, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 0.7592033796016898, | |
| "grad_norm": 4.428173356861454, | |
| "learning_rate": 8.362382914110804e-07, | |
| "loss": 0.6007, | |
| "step": 1887 | |
| }, | |
| { | |
| "epoch": 0.7596057131361899, | |
| "grad_norm": 4.255468940795954, | |
| "learning_rate": 8.336193866118117e-07, | |
| "loss": 0.5153, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.76000804667069, | |
| "grad_norm": 4.025791541260584, | |
| "learning_rate": 8.310037683751527e-07, | |
| "loss": 0.4859, | |
| "step": 1889 | |
| }, | |
| { | |
| "epoch": 0.7604103802051901, | |
| "grad_norm": 3.8573782279099533, | |
| "learning_rate": 8.283914418598263e-07, | |
| "loss": 0.5058, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7608127137396902, | |
| "grad_norm": 3.332235385366874, | |
| "learning_rate": 8.257824122180602e-07, | |
| "loss": 0.3737, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 0.7612150472741903, | |
| "grad_norm": 3.394561752123056, | |
| "learning_rate": 8.231766845955818e-07, | |
| "loss": 0.4197, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.7616173808086905, | |
| "grad_norm": 3.3171785759316443, | |
| "learning_rate": 8.205742641316064e-07, | |
| "loss": 0.417, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 0.7620197143431905, | |
| "grad_norm": 4.487532016958629, | |
| "learning_rate": 8.179751559588234e-07, | |
| "loss": 0.5663, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 0.7624220478776906, | |
| "grad_norm": 4.546271967177341, | |
| "learning_rate": 8.153793652033931e-07, | |
| "loss": 0.5597, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.7628243814121907, | |
| "grad_norm": 3.914164846084329, | |
| "learning_rate": 8.12786896984931e-07, | |
| "loss": 0.4677, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 0.7632267149466908, | |
| "grad_norm": 3.852819616304247, | |
| "learning_rate": 8.101977564165011e-07, | |
| "loss": 0.503, | |
| "step": 1897 | |
| }, | |
| { | |
| "epoch": 0.763629048481191, | |
| "grad_norm": 4.067929177534588, | |
| "learning_rate": 8.076119486046019e-07, | |
| "loss": 0.5396, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 0.764031382015691, | |
| "grad_norm": 4.010571344025329, | |
| "learning_rate": 8.050294786491611e-07, | |
| "loss": 0.4829, | |
| "step": 1899 | |
| }, | |
| { | |
| "epoch": 0.7644337155501911, | |
| "grad_norm": 4.199207618578668, | |
| "learning_rate": 8.024503516435222e-07, | |
| "loss": 0.6123, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7648360490846912, | |
| "grad_norm": 4.313823267969129, | |
| "learning_rate": 7.998745726744358e-07, | |
| "loss": 0.5597, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 0.7652383826191913, | |
| "grad_norm": 3.7661528466618956, | |
| "learning_rate": 7.973021468220502e-07, | |
| "loss": 0.5036, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 0.7656407161536914, | |
| "grad_norm": 4.017310363498917, | |
| "learning_rate": 7.947330791598973e-07, | |
| "loss": 0.5435, | |
| "step": 1903 | |
| }, | |
| { | |
| "epoch": 0.7660430496881915, | |
| "grad_norm": 4.447099011053462, | |
| "learning_rate": 7.921673747548895e-07, | |
| "loss": 0.4892, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.7664453832226916, | |
| "grad_norm": 4.1491367299305555, | |
| "learning_rate": 7.896050386673026e-07, | |
| "loss": 0.5234, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.7668477167571918, | |
| "grad_norm": 4.640959967611087, | |
| "learning_rate": 7.870460759507722e-07, | |
| "loss": 0.5619, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 0.7672500502916918, | |
| "grad_norm": 4.207681163367058, | |
| "learning_rate": 7.844904916522784e-07, | |
| "loss": 0.6313, | |
| "step": 1907 | |
| }, | |
| { | |
| "epoch": 0.7676523838261919, | |
| "grad_norm": 4.624725241802207, | |
| "learning_rate": 7.819382908121386e-07, | |
| "loss": 0.6169, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 0.768054717360692, | |
| "grad_norm": 3.891844454436063, | |
| "learning_rate": 7.793894784639986e-07, | |
| "loss": 0.4932, | |
| "step": 1909 | |
| }, | |
| { | |
| "epoch": 0.7684570508951921, | |
| "grad_norm": 3.6005349724480573, | |
| "learning_rate": 7.768440596348192e-07, | |
| "loss": 0.5835, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7688593844296923, | |
| "grad_norm": 4.073804566700674, | |
| "learning_rate": 7.743020393448702e-07, | |
| "loss": 0.4882, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 0.7692617179641923, | |
| "grad_norm": 3.959969867245135, | |
| "learning_rate": 7.717634226077156e-07, | |
| "loss": 0.4945, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.7696640514986924, | |
| "grad_norm": 3.599166244946097, | |
| "learning_rate": 7.692282144302094e-07, | |
| "loss": 0.3889, | |
| "step": 1913 | |
| }, | |
| { | |
| "epoch": 0.7700663850331925, | |
| "grad_norm": 3.539375774753211, | |
| "learning_rate": 7.666964198124819e-07, | |
| "loss": 0.523, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.7704687185676926, | |
| "grad_norm": 3.912187073942566, | |
| "learning_rate": 7.64168043747932e-07, | |
| "loss": 0.5567, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.7708710521021928, | |
| "grad_norm": 4.034008046236846, | |
| "learning_rate": 7.616430912232137e-07, | |
| "loss": 0.5219, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.7712733856366928, | |
| "grad_norm": 4.074005253663992, | |
| "learning_rate": 7.59121567218232e-07, | |
| "loss": 0.5302, | |
| "step": 1917 | |
| }, | |
| { | |
| "epoch": 0.7716757191711929, | |
| "grad_norm": 3.4776790939949347, | |
| "learning_rate": 7.566034767061265e-07, | |
| "loss": 0.2992, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.7720780527056931, | |
| "grad_norm": 3.755741404325393, | |
| "learning_rate": 7.540888246532688e-07, | |
| "loss": 0.4337, | |
| "step": 1919 | |
| }, | |
| { | |
| "epoch": 0.7724803862401931, | |
| "grad_norm": 3.854464881913325, | |
| "learning_rate": 7.515776160192454e-07, | |
| "loss": 0.5286, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7728827197746932, | |
| "grad_norm": 3.74441109291879, | |
| "learning_rate": 7.490698557568535e-07, | |
| "loss": 0.5449, | |
| "step": 1921 | |
| }, | |
| { | |
| "epoch": 0.7732850533091933, | |
| "grad_norm": 4.570844263947877, | |
| "learning_rate": 7.465655488120891e-07, | |
| "loss": 0.6153, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.7736873868436934, | |
| "grad_norm": 3.7650085288496613, | |
| "learning_rate": 7.440647001241363e-07, | |
| "loss": 0.5166, | |
| "step": 1923 | |
| }, | |
| { | |
| "epoch": 0.7740897203781936, | |
| "grad_norm": 3.6719483984817236, | |
| "learning_rate": 7.415673146253604e-07, | |
| "loss": 0.4842, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.7744920539126936, | |
| "grad_norm": 4.057471839423865, | |
| "learning_rate": 7.390733972412933e-07, | |
| "loss": 0.6075, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.7748943874471937, | |
| "grad_norm": 4.4626847789976924, | |
| "learning_rate": 7.365829528906293e-07, | |
| "loss": 0.5292, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 0.7752967209816938, | |
| "grad_norm": 3.76677011259477, | |
| "learning_rate": 7.340959864852124e-07, | |
| "loss": 0.405, | |
| "step": 1927 | |
| }, | |
| { | |
| "epoch": 0.7756990545161939, | |
| "grad_norm": 3.8469846124186082, | |
| "learning_rate": 7.316125029300275e-07, | |
| "loss": 0.4827, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.7761013880506941, | |
| "grad_norm": 3.8371445392089827, | |
| "learning_rate": 7.291325071231883e-07, | |
| "loss": 0.5134, | |
| "step": 1929 | |
| }, | |
| { | |
| "epoch": 0.7765037215851941, | |
| "grad_norm": 3.8383451450589887, | |
| "learning_rate": 7.26656003955932e-07, | |
| "loss": 0.4061, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.7769060551196942, | |
| "grad_norm": 4.037666657124557, | |
| "learning_rate": 7.241829983126075e-07, | |
| "loss": 0.4976, | |
| "step": 1931 | |
| }, | |
| { | |
| "epoch": 0.7773083886541944, | |
| "grad_norm": 3.980302342869356, | |
| "learning_rate": 7.217134950706625e-07, | |
| "loss": 0.5149, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.7777107221886944, | |
| "grad_norm": 3.7463100803001277, | |
| "learning_rate": 7.192474991006416e-07, | |
| "loss": 0.459, | |
| "step": 1933 | |
| }, | |
| { | |
| "epoch": 0.7781130557231946, | |
| "grad_norm": 3.9126255555142597, | |
| "learning_rate": 7.167850152661673e-07, | |
| "loss": 0.5537, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 0.7785153892576946, | |
| "grad_norm": 4.306777769767276, | |
| "learning_rate": 7.143260484239386e-07, | |
| "loss": 0.4997, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.7789177227921947, | |
| "grad_norm": 4.472035583740821, | |
| "learning_rate": 7.118706034237172e-07, | |
| "loss": 0.6373, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.7793200563266949, | |
| "grad_norm": 3.8082808763272356, | |
| "learning_rate": 7.094186851083187e-07, | |
| "loss": 0.5713, | |
| "step": 1937 | |
| }, | |
| { | |
| "epoch": 0.7797223898611949, | |
| "grad_norm": 3.907355998411772, | |
| "learning_rate": 7.069702983136023e-07, | |
| "loss": 0.4916, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 0.780124723395695, | |
| "grad_norm": 3.4404115391335623, | |
| "learning_rate": 7.045254478684626e-07, | |
| "loss": 0.4552, | |
| "step": 1939 | |
| }, | |
| { | |
| "epoch": 0.7805270569301951, | |
| "grad_norm": 4.072479874224982, | |
| "learning_rate": 7.020841385948199e-07, | |
| "loss": 0.5061, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.7809293904646952, | |
| "grad_norm": 5.014496338999608, | |
| "learning_rate": 6.996463753076102e-07, | |
| "loss": 0.5401, | |
| "step": 1941 | |
| }, | |
| { | |
| "epoch": 0.7813317239991954, | |
| "grad_norm": 4.5982221422710134, | |
| "learning_rate": 6.972121628147766e-07, | |
| "loss": 0.6385, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 0.7817340575336954, | |
| "grad_norm": 3.866309336941733, | |
| "learning_rate": 6.94781505917256e-07, | |
| "loss": 0.5007, | |
| "step": 1943 | |
| }, | |
| { | |
| "epoch": 0.7821363910681955, | |
| "grad_norm": 4.000641087223532, | |
| "learning_rate": 6.923544094089762e-07, | |
| "loss": 0.4097, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 0.7825387246026957, | |
| "grad_norm": 4.183627371809607, | |
| "learning_rate": 6.899308780768423e-07, | |
| "loss": 0.4986, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.7829410581371957, | |
| "grad_norm": 3.834784873841407, | |
| "learning_rate": 6.875109167007255e-07, | |
| "loss": 0.4719, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.7833433916716959, | |
| "grad_norm": 4.624045837237235, | |
| "learning_rate": 6.85094530053459e-07, | |
| "loss": 0.5742, | |
| "step": 1947 | |
| }, | |
| { | |
| "epoch": 0.7837457252061959, | |
| "grad_norm": 4.968557042198568, | |
| "learning_rate": 6.826817229008232e-07, | |
| "loss": 0.6943, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.784148058740696, | |
| "grad_norm": 3.798467897259186, | |
| "learning_rate": 6.802725000015406e-07, | |
| "loss": 0.5297, | |
| "step": 1949 | |
| }, | |
| { | |
| "epoch": 0.7845503922751962, | |
| "grad_norm": 3.974644659610978, | |
| "learning_rate": 6.778668661072643e-07, | |
| "loss": 0.4803, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7849527258096962, | |
| "grad_norm": 4.048333500653427, | |
| "learning_rate": 6.754648259625673e-07, | |
| "loss": 0.5941, | |
| "step": 1951 | |
| }, | |
| { | |
| "epoch": 0.7853550593441964, | |
| "grad_norm": 4.3035958811807875, | |
| "learning_rate": 6.730663843049367e-07, | |
| "loss": 0.5955, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.7857573928786964, | |
| "grad_norm": 3.6544635394961764, | |
| "learning_rate": 6.706715458647615e-07, | |
| "loss": 0.4851, | |
| "step": 1953 | |
| }, | |
| { | |
| "epoch": 0.7861597264131965, | |
| "grad_norm": 4.218917868283341, | |
| "learning_rate": 6.682803153653241e-07, | |
| "loss": 0.6081, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 0.7865620599476967, | |
| "grad_norm": 3.617134711107577, | |
| "learning_rate": 6.658926975227923e-07, | |
| "loss": 0.4414, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.7869643934821967, | |
| "grad_norm": 4.041925903109421, | |
| "learning_rate": 6.635086970462057e-07, | |
| "loss": 0.4589, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 0.7873667270166969, | |
| "grad_norm": 3.9272206421591944, | |
| "learning_rate": 6.611283186374723e-07, | |
| "loss": 0.4697, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 0.7877690605511969, | |
| "grad_norm": 4.242979124153208, | |
| "learning_rate": 6.587515669913558e-07, | |
| "loss": 0.4185, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 0.788171394085697, | |
| "grad_norm": 3.38359591341345, | |
| "learning_rate": 6.563784467954668e-07, | |
| "loss": 0.3828, | |
| "step": 1959 | |
| }, | |
| { | |
| "epoch": 0.7885737276201972, | |
| "grad_norm": 4.348954859955942, | |
| "learning_rate": 6.540089627302523e-07, | |
| "loss": 0.5046, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7889760611546972, | |
| "grad_norm": 4.120739882383607, | |
| "learning_rate": 6.516431194689907e-07, | |
| "loss": 0.5227, | |
| "step": 1961 | |
| }, | |
| { | |
| "epoch": 0.7893783946891973, | |
| "grad_norm": 3.6740741354898305, | |
| "learning_rate": 6.492809216777762e-07, | |
| "loss": 0.4134, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 0.7897807282236975, | |
| "grad_norm": 3.690318810926642, | |
| "learning_rate": 6.469223740155159e-07, | |
| "loss": 0.4901, | |
| "step": 1963 | |
| }, | |
| { | |
| "epoch": 0.7901830617581975, | |
| "grad_norm": 4.2381781760809485, | |
| "learning_rate": 6.445674811339175e-07, | |
| "loss": 0.4225, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.7905853952926977, | |
| "grad_norm": 4.106368765680081, | |
| "learning_rate": 6.422162476774788e-07, | |
| "loss": 0.5645, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.7909877288271977, | |
| "grad_norm": 3.706517698008605, | |
| "learning_rate": 6.398686782834815e-07, | |
| "loss": 0.4995, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 0.7913900623616978, | |
| "grad_norm": 4.432403407342872, | |
| "learning_rate": 6.375247775819809e-07, | |
| "loss": 0.5127, | |
| "step": 1967 | |
| }, | |
| { | |
| "epoch": 0.791792395896198, | |
| "grad_norm": 4.022769337353194, | |
| "learning_rate": 6.351845501957971e-07, | |
| "loss": 0.4525, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 0.792194729430698, | |
| "grad_norm": 4.43806071523494, | |
| "learning_rate": 6.328480007405027e-07, | |
| "loss": 0.5067, | |
| "step": 1969 | |
| }, | |
| { | |
| "epoch": 0.7925970629651982, | |
| "grad_norm": 3.7265320273813027, | |
| "learning_rate": 6.305151338244192e-07, | |
| "loss": 0.4277, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7929993964996982, | |
| "grad_norm": 4.06221248413846, | |
| "learning_rate": 6.281859540486043e-07, | |
| "loss": 0.6297, | |
| "step": 1971 | |
| }, | |
| { | |
| "epoch": 0.7934017300341983, | |
| "grad_norm": 4.391919257148749, | |
| "learning_rate": 6.258604660068443e-07, | |
| "loss": 0.4994, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.7938040635686985, | |
| "grad_norm": 4.252838693305146, | |
| "learning_rate": 6.23538674285642e-07, | |
| "loss": 0.5949, | |
| "step": 1973 | |
| }, | |
| { | |
| "epoch": 0.7942063971031985, | |
| "grad_norm": 4.325947652020019, | |
| "learning_rate": 6.212205834642132e-07, | |
| "loss": 0.5428, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.7946087306376987, | |
| "grad_norm": 4.185176650642697, | |
| "learning_rate": 6.189061981144715e-07, | |
| "loss": 0.5732, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.7950110641721988, | |
| "grad_norm": 3.2994931408989627, | |
| "learning_rate": 6.165955228010242e-07, | |
| "loss": 0.37, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.7954133977066988, | |
| "grad_norm": 3.9581400842405277, | |
| "learning_rate": 6.14288562081162e-07, | |
| "loss": 0.6003, | |
| "step": 1977 | |
| }, | |
| { | |
| "epoch": 0.795815731241199, | |
| "grad_norm": 3.855839195313672, | |
| "learning_rate": 6.11985320504847e-07, | |
| "loss": 0.5755, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 0.796218064775699, | |
| "grad_norm": 4.28483670710571, | |
| "learning_rate": 6.096858026147079e-07, | |
| "loss": 0.5674, | |
| "step": 1979 | |
| }, | |
| { | |
| "epoch": 0.7966203983101992, | |
| "grad_norm": 3.946092310322622, | |
| "learning_rate": 6.073900129460286e-07, | |
| "loss": 0.4429, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7970227318446993, | |
| "grad_norm": 3.9442626812628725, | |
| "learning_rate": 6.050979560267414e-07, | |
| "loss": 0.4953, | |
| "step": 1981 | |
| }, | |
| { | |
| "epoch": 0.7974250653791993, | |
| "grad_norm": 4.023203847345498, | |
| "learning_rate": 6.028096363774136e-07, | |
| "loss": 0.5437, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 0.7978273989136995, | |
| "grad_norm": 4.5536613551126335, | |
| "learning_rate": 6.005250585112437e-07, | |
| "loss": 0.6177, | |
| "step": 1983 | |
| }, | |
| { | |
| "epoch": 0.7982297324481995, | |
| "grad_norm": 3.8312030174786313, | |
| "learning_rate": 5.982442269340505e-07, | |
| "loss": 0.4801, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.7986320659826996, | |
| "grad_norm": 3.8614656531277896, | |
| "learning_rate": 5.959671461442634e-07, | |
| "loss": 0.4395, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.7990343995171998, | |
| "grad_norm": 4.191209006767766, | |
| "learning_rate": 5.936938206329148e-07, | |
| "loss": 0.4913, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 0.7994367330516998, | |
| "grad_norm": 4.201209209604027, | |
| "learning_rate": 5.914242548836291e-07, | |
| "loss": 0.567, | |
| "step": 1987 | |
| }, | |
| { | |
| "epoch": 0.7998390665862, | |
| "grad_norm": 4.711191490796992, | |
| "learning_rate": 5.891584533726177e-07, | |
| "loss": 0.6445, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.8002414001207001, | |
| "grad_norm": 4.050282830317415, | |
| "learning_rate": 5.868964205686653e-07, | |
| "loss": 0.4854, | |
| "step": 1989 | |
| }, | |
| { | |
| "epoch": 0.8006437336552001, | |
| "grad_norm": 3.4477078683288838, | |
| "learning_rate": 5.84638160933127e-07, | |
| "loss": 0.4578, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.8010460671897003, | |
| "grad_norm": 3.977635083967608, | |
| "learning_rate": 5.82383678919912e-07, | |
| "loss": 0.4966, | |
| "step": 1991 | |
| }, | |
| { | |
| "epoch": 0.8014484007242003, | |
| "grad_norm": 4.254984216161458, | |
| "learning_rate": 5.801329789754826e-07, | |
| "loss": 0.5101, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 0.8018507342587005, | |
| "grad_norm": 4.1472621686996005, | |
| "learning_rate": 5.778860655388399e-07, | |
| "loss": 0.6366, | |
| "step": 1993 | |
| }, | |
| { | |
| "epoch": 0.8022530677932006, | |
| "grad_norm": 4.0712688803226715, | |
| "learning_rate": 5.75642943041518e-07, | |
| "loss": 0.4908, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 0.8026554013277006, | |
| "grad_norm": 4.69998065888643, | |
| "learning_rate": 5.734036159075743e-07, | |
| "loss": 0.6769, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.8030577348622008, | |
| "grad_norm": 3.5455182945935144, | |
| "learning_rate": 5.711680885535784e-07, | |
| "loss": 0.4177, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.8034600683967008, | |
| "grad_norm": 3.853566513480482, | |
| "learning_rate": 5.689363653886085e-07, | |
| "loss": 0.4434, | |
| "step": 1997 | |
| }, | |
| { | |
| "epoch": 0.803862401931201, | |
| "grad_norm": 4.050333441948282, | |
| "learning_rate": 5.667084508142386e-07, | |
| "loss": 0.4822, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 0.8042647354657011, | |
| "grad_norm": 4.442980318720399, | |
| "learning_rate": 5.644843492245322e-07, | |
| "loss": 0.4872, | |
| "step": 1999 | |
| }, | |
| { | |
| "epoch": 0.8046670690002011, | |
| "grad_norm": 4.457653531733048, | |
| "learning_rate": 5.622640650060301e-07, | |
| "loss": 0.6444, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8050694025347013, | |
| "grad_norm": 3.921778521793536, | |
| "learning_rate": 5.600476025377463e-07, | |
| "loss": 0.5391, | |
| "step": 2001 | |
| }, | |
| { | |
| "epoch": 0.8054717360692014, | |
| "grad_norm": 3.8081912666035755, | |
| "learning_rate": 5.578349661911578e-07, | |
| "loss": 0.4802, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 0.8058740696037014, | |
| "grad_norm": 4.001868301091059, | |
| "learning_rate": 5.556261603301924e-07, | |
| "loss": 0.5459, | |
| "step": 2003 | |
| }, | |
| { | |
| "epoch": 0.8062764031382016, | |
| "grad_norm": 3.847308434546458, | |
| "learning_rate": 5.534211893112268e-07, | |
| "loss": 0.4606, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 0.8066787366727016, | |
| "grad_norm": 4.15027221954597, | |
| "learning_rate": 5.512200574830712e-07, | |
| "loss": 0.5705, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.8070810702072018, | |
| "grad_norm": 3.8098780638264027, | |
| "learning_rate": 5.490227691869656e-07, | |
| "loss": 0.491, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 0.8074834037417019, | |
| "grad_norm": 3.9362205164209616, | |
| "learning_rate": 5.468293287565696e-07, | |
| "loss": 0.4342, | |
| "step": 2007 | |
| }, | |
| { | |
| "epoch": 0.8078857372762019, | |
| "grad_norm": 3.621695424225381, | |
| "learning_rate": 5.446397405179534e-07, | |
| "loss": 0.4943, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 0.8082880708107021, | |
| "grad_norm": 4.894139218776488, | |
| "learning_rate": 5.424540087895888e-07, | |
| "loss": 0.7108, | |
| "step": 2009 | |
| }, | |
| { | |
| "epoch": 0.8086904043452021, | |
| "grad_norm": 4.43306730596569, | |
| "learning_rate": 5.402721378823422e-07, | |
| "loss": 0.5759, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.8090927378797023, | |
| "grad_norm": 4.303012456014276, | |
| "learning_rate": 5.380941320994659e-07, | |
| "loss": 0.575, | |
| "step": 2011 | |
| }, | |
| { | |
| "epoch": 0.8094950714142024, | |
| "grad_norm": 4.313613214512251, | |
| "learning_rate": 5.35919995736589e-07, | |
| "loss": 0.4761, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 0.8098974049487024, | |
| "grad_norm": 3.8521996380430394, | |
| "learning_rate": 5.337497330817074e-07, | |
| "loss": 0.5352, | |
| "step": 2013 | |
| }, | |
| { | |
| "epoch": 0.8102997384832026, | |
| "grad_norm": 4.528703112399277, | |
| "learning_rate": 5.315833484151786e-07, | |
| "loss": 0.5959, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 0.8107020720177027, | |
| "grad_norm": 3.878303004429545, | |
| "learning_rate": 5.294208460097125e-07, | |
| "loss": 0.4815, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.8111044055522028, | |
| "grad_norm": 4.490283907824032, | |
| "learning_rate": 5.272622301303587e-07, | |
| "loss": 0.6368, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.8115067390867029, | |
| "grad_norm": 4.279104111581364, | |
| "learning_rate": 5.251075050345056e-07, | |
| "loss": 0.5216, | |
| "step": 2017 | |
| }, | |
| { | |
| "epoch": 0.8119090726212029, | |
| "grad_norm": 3.6608651815923086, | |
| "learning_rate": 5.229566749718645e-07, | |
| "loss": 0.4809, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 0.8123114061557031, | |
| "grad_norm": 4.666782295826111, | |
| "learning_rate": 5.208097441844668e-07, | |
| "loss": 0.504, | |
| "step": 2019 | |
| }, | |
| { | |
| "epoch": 0.8127137396902032, | |
| "grad_norm": 3.84925143665575, | |
| "learning_rate": 5.186667169066523e-07, | |
| "loss": 0.5423, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.8131160732247033, | |
| "grad_norm": 3.578156563304788, | |
| "learning_rate": 5.165275973650633e-07, | |
| "loss": 0.3936, | |
| "step": 2021 | |
| }, | |
| { | |
| "epoch": 0.8135184067592034, | |
| "grad_norm": 4.074315611637044, | |
| "learning_rate": 5.143923897786329e-07, | |
| "loss": 0.6302, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 0.8139207402937034, | |
| "grad_norm": 4.085845386699454, | |
| "learning_rate": 5.122610983585808e-07, | |
| "loss": 0.5789, | |
| "step": 2023 | |
| }, | |
| { | |
| "epoch": 0.8143230738282036, | |
| "grad_norm": 4.000394354287652, | |
| "learning_rate": 5.101337273084017e-07, | |
| "loss": 0.4229, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 0.8147254073627037, | |
| "grad_norm": 3.835079915243062, | |
| "learning_rate": 5.080102808238585e-07, | |
| "loss": 0.5005, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.8151277408972037, | |
| "grad_norm": 3.590556700281588, | |
| "learning_rate": 5.058907630929749e-07, | |
| "loss": 0.4452, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 0.8155300744317039, | |
| "grad_norm": 4.254640813185372, | |
| "learning_rate": 5.037751782960234e-07, | |
| "loss": 0.6328, | |
| "step": 2027 | |
| }, | |
| { | |
| "epoch": 0.815932407966204, | |
| "grad_norm": 4.290085466017056, | |
| "learning_rate": 5.016635306055223e-07, | |
| "loss": 0.5785, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 0.8163347415007041, | |
| "grad_norm": 4.159416054122212, | |
| "learning_rate": 4.995558241862242e-07, | |
| "loss": 0.5129, | |
| "step": 2029 | |
| }, | |
| { | |
| "epoch": 0.8167370750352042, | |
| "grad_norm": 3.7061504996754153, | |
| "learning_rate": 4.974520631951069e-07, | |
| "loss": 0.5305, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.8171394085697042, | |
| "grad_norm": 4.178397491082031, | |
| "learning_rate": 4.953522517813689e-07, | |
| "loss": 0.5855, | |
| "step": 2031 | |
| }, | |
| { | |
| "epoch": 0.8175417421042044, | |
| "grad_norm": 4.154495627484645, | |
| "learning_rate": 4.932563940864168e-07, | |
| "loss": 0.4164, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 0.8179440756387045, | |
| "grad_norm": 4.283562955009001, | |
| "learning_rate": 4.911644942438614e-07, | |
| "loss": 0.4942, | |
| "step": 2033 | |
| }, | |
| { | |
| "epoch": 0.8183464091732046, | |
| "grad_norm": 3.307871102947408, | |
| "learning_rate": 4.890765563795072e-07, | |
| "loss": 0.4552, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 0.8187487427077047, | |
| "grad_norm": 4.15735416688211, | |
| "learning_rate": 4.86992584611343e-07, | |
| "loss": 0.5797, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.8191510762422047, | |
| "grad_norm": 4.111111195260256, | |
| "learning_rate": 4.849125830495366e-07, | |
| "loss": 0.5776, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 0.8195534097767049, | |
| "grad_norm": 3.534887324822181, | |
| "learning_rate": 4.828365557964257e-07, | |
| "loss": 0.4913, | |
| "step": 2037 | |
| }, | |
| { | |
| "epoch": 0.819955743311205, | |
| "grad_norm": 3.8702017938683606, | |
| "learning_rate": 4.807645069465095e-07, | |
| "loss": 0.4638, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 0.820358076845705, | |
| "grad_norm": 4.105810885704208, | |
| "learning_rate": 4.786964405864403e-07, | |
| "loss": 0.51, | |
| "step": 2039 | |
| }, | |
| { | |
| "epoch": 0.8207604103802052, | |
| "grad_norm": 3.7721413153025183, | |
| "learning_rate": 4.766323607950149e-07, | |
| "loss": 0.5327, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.8211627439147053, | |
| "grad_norm": 4.604383899796593, | |
| "learning_rate": 4.7457227164316914e-07, | |
| "loss": 0.6425, | |
| "step": 2041 | |
| }, | |
| { | |
| "epoch": 0.8215650774492054, | |
| "grad_norm": 4.7452724983070595, | |
| "learning_rate": 4.725161771939679e-07, | |
| "loss": 0.5968, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 0.8219674109837055, | |
| "grad_norm": 4.13939509952924, | |
| "learning_rate": 4.704640815025971e-07, | |
| "loss": 0.5454, | |
| "step": 2043 | |
| }, | |
| { | |
| "epoch": 0.8223697445182055, | |
| "grad_norm": 4.07811911040908, | |
| "learning_rate": 4.684159886163553e-07, | |
| "loss": 0.4846, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 0.8227720780527057, | |
| "grad_norm": 4.223857527886817, | |
| "learning_rate": 4.663719025746477e-07, | |
| "loss": 0.548, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.8231744115872058, | |
| "grad_norm": 4.055398221449843, | |
| "learning_rate": 4.643318274089756e-07, | |
| "loss": 0.4762, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 0.8235767451217059, | |
| "grad_norm": 3.93292252326897, | |
| "learning_rate": 4.6229576714293067e-07, | |
| "loss": 0.5506, | |
| "step": 2047 | |
| }, | |
| { | |
| "epoch": 0.823979078656206, | |
| "grad_norm": 3.5755501548250357, | |
| "learning_rate": 4.6026372579218686e-07, | |
| "loss": 0.5277, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.824381412190706, | |
| "grad_norm": 3.900846576287911, | |
| "learning_rate": 4.5823570736448913e-07, | |
| "loss": 0.4289, | |
| "step": 2049 | |
| }, | |
| { | |
| "epoch": 0.8247837457252062, | |
| "grad_norm": 4.422345897561269, | |
| "learning_rate": 4.5621171585965046e-07, | |
| "loss": 0.5439, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.8251860792597063, | |
| "grad_norm": 3.86188238287213, | |
| "learning_rate": 4.5419175526954074e-07, | |
| "loss": 0.4971, | |
| "step": 2051 | |
| }, | |
| { | |
| "epoch": 0.8255884127942064, | |
| "grad_norm": 4.149840183197456, | |
| "learning_rate": 4.5217582957808064e-07, | |
| "loss": 0.4913, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 0.8259907463287065, | |
| "grad_norm": 4.14442670188834, | |
| "learning_rate": 4.501639427612309e-07, | |
| "loss": 0.593, | |
| "step": 2053 | |
| }, | |
| { | |
| "epoch": 0.8263930798632066, | |
| "grad_norm": 4.160435006991155, | |
| "learning_rate": 4.4815609878698847e-07, | |
| "loss": 0.5702, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 0.8267954133977067, | |
| "grad_norm": 4.699994325668033, | |
| "learning_rate": 4.46152301615376e-07, | |
| "loss": 0.6052, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.8271977469322068, | |
| "grad_norm": 4.158216057441291, | |
| "learning_rate": 4.441525551984349e-07, | |
| "loss": 0.6375, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 0.8276000804667069, | |
| "grad_norm": 5.052807884367178, | |
| "learning_rate": 4.421568634802165e-07, | |
| "loss": 0.687, | |
| "step": 2057 | |
| }, | |
| { | |
| "epoch": 0.828002414001207, | |
| "grad_norm": 3.9353061661541133, | |
| "learning_rate": 4.40165230396776e-07, | |
| "loss": 0.4774, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 0.8284047475357071, | |
| "grad_norm": 4.01193456780117, | |
| "learning_rate": 4.3817765987616474e-07, | |
| "loss": 0.4932, | |
| "step": 2059 | |
| }, | |
| { | |
| "epoch": 0.8288070810702072, | |
| "grad_norm": 4.976729164334829, | |
| "learning_rate": 4.361941558384189e-07, | |
| "loss": 0.6807, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.8292094146047073, | |
| "grad_norm": 4.339682582742787, | |
| "learning_rate": 4.3421472219555725e-07, | |
| "loss": 0.4727, | |
| "step": 2061 | |
| }, | |
| { | |
| "epoch": 0.8296117481392074, | |
| "grad_norm": 4.503522087170257, | |
| "learning_rate": 4.3223936285156836e-07, | |
| "loss": 0.6022, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 0.8300140816737075, | |
| "grad_norm": 4.009166626990359, | |
| "learning_rate": 4.3026808170240686e-07, | |
| "loss": 0.6318, | |
| "step": 2063 | |
| }, | |
| { | |
| "epoch": 0.8304164152082076, | |
| "grad_norm": 4.433472277211533, | |
| "learning_rate": 4.283008826359833e-07, | |
| "loss": 0.6059, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 0.8308187487427077, | |
| "grad_norm": 4.239654566860423, | |
| "learning_rate": 4.263377695321577e-07, | |
| "loss": 0.5319, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.8312210822772078, | |
| "grad_norm": 4.33872236149242, | |
| "learning_rate": 4.2437874626273024e-07, | |
| "loss": 0.5192, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 0.831623415811708, | |
| "grad_norm": 4.396517305178651, | |
| "learning_rate": 4.2242381669143603e-07, | |
| "loss": 0.6266, | |
| "step": 2067 | |
| }, | |
| { | |
| "epoch": 0.832025749346208, | |
| "grad_norm": 4.213477790744039, | |
| "learning_rate": 4.204729846739358e-07, | |
| "loss": 0.6738, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 0.8324280828807081, | |
| "grad_norm": 3.312219624247552, | |
| "learning_rate": 4.1852625405780914e-07, | |
| "loss": 0.4833, | |
| "step": 2069 | |
| }, | |
| { | |
| "epoch": 0.8328304164152082, | |
| "grad_norm": 3.9392383731854257, | |
| "learning_rate": 4.165836286825464e-07, | |
| "loss": 0.4513, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.8332327499497083, | |
| "grad_norm": 3.8224909723307734, | |
| "learning_rate": 4.1464511237954056e-07, | |
| "loss": 0.5628, | |
| "step": 2071 | |
| }, | |
| { | |
| "epoch": 0.8336350834842085, | |
| "grad_norm": 4.5465506777870885, | |
| "learning_rate": 4.1271070897208166e-07, | |
| "loss": 0.5761, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 0.8340374170187085, | |
| "grad_norm": 3.87942801566727, | |
| "learning_rate": 4.107804222753464e-07, | |
| "loss": 0.4364, | |
| "step": 2073 | |
| }, | |
| { | |
| "epoch": 0.8344397505532086, | |
| "grad_norm": 3.901824805945848, | |
| "learning_rate": 4.088542560963937e-07, | |
| "loss": 0.4897, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 0.8348420840877087, | |
| "grad_norm": 4.2369387169530155, | |
| "learning_rate": 4.0693221423415447e-07, | |
| "loss": 0.6186, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.8352444176222088, | |
| "grad_norm": 4.1329987533853005, | |
| "learning_rate": 4.0501430047942603e-07, | |
| "loss": 0.5358, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 0.835646751156709, | |
| "grad_norm": 4.533598883351018, | |
| "learning_rate": 4.03100518614864e-07, | |
| "loss": 0.5522, | |
| "step": 2077 | |
| }, | |
| { | |
| "epoch": 0.836049084691209, | |
| "grad_norm": 3.998009974659786, | |
| "learning_rate": 4.011908724149746e-07, | |
| "loss": 0.3743, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 0.8364514182257091, | |
| "grad_norm": 3.417865078970055, | |
| "learning_rate": 3.9928536564610767e-07, | |
| "loss": 0.4035, | |
| "step": 2079 | |
| }, | |
| { | |
| "epoch": 0.8368537517602093, | |
| "grad_norm": 3.9749987307006345, | |
| "learning_rate": 3.9738400206644766e-07, | |
| "loss": 0.6477, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.8372560852947093, | |
| "grad_norm": 4.037697255647064, | |
| "learning_rate": 3.9548678542600886e-07, | |
| "loss": 0.5661, | |
| "step": 2081 | |
| }, | |
| { | |
| "epoch": 0.8376584188292094, | |
| "grad_norm": 4.431095797135675, | |
| "learning_rate": 3.935937194666267e-07, | |
| "loss": 0.4818, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 0.8380607523637095, | |
| "grad_norm": 4.260045700942229, | |
| "learning_rate": 3.9170480792194985e-07, | |
| "loss": 0.4263, | |
| "step": 2083 | |
| }, | |
| { | |
| "epoch": 0.8384630858982096, | |
| "grad_norm": 4.051159338313962, | |
| "learning_rate": 3.8982005451743243e-07, | |
| "loss": 0.487, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 0.8388654194327098, | |
| "grad_norm": 3.505632578812268, | |
| "learning_rate": 3.8793946297032926e-07, | |
| "loss": 0.5173, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.8392677529672098, | |
| "grad_norm": 3.4621570841739917, | |
| "learning_rate": 3.8606303698968643e-07, | |
| "loss": 0.3609, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 0.8396700865017099, | |
| "grad_norm": 3.708654324006204, | |
| "learning_rate": 3.841907802763331e-07, | |
| "loss": 0.4437, | |
| "step": 2087 | |
| }, | |
| { | |
| "epoch": 0.84007242003621, | |
| "grad_norm": 3.3369961693311447, | |
| "learning_rate": 3.8232269652287713e-07, | |
| "loss": 0.4295, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 0.8404747535707101, | |
| "grad_norm": 3.8475631558462884, | |
| "learning_rate": 3.8045878941369434e-07, | |
| "loss": 0.5628, | |
| "step": 2089 | |
| }, | |
| { | |
| "epoch": 0.8408770871052103, | |
| "grad_norm": 3.827602289260064, | |
| "learning_rate": 3.7859906262492503e-07, | |
| "loss": 0.5158, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.8412794206397103, | |
| "grad_norm": 4.093693428637088, | |
| "learning_rate": 3.7674351982446347e-07, | |
| "loss": 0.4742, | |
| "step": 2091 | |
| }, | |
| { | |
| "epoch": 0.8416817541742104, | |
| "grad_norm": 4.310046930338328, | |
| "learning_rate": 3.7489216467195304e-07, | |
| "loss": 0.6097, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 0.8420840877087106, | |
| "grad_norm": 4.017527345482499, | |
| "learning_rate": 3.730450008187761e-07, | |
| "loss": 0.5075, | |
| "step": 2093 | |
| }, | |
| { | |
| "epoch": 0.8424864212432106, | |
| "grad_norm": 3.946920016020563, | |
| "learning_rate": 3.7120203190805034e-07, | |
| "loss": 0.4275, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 0.8428887547777107, | |
| "grad_norm": 4.145255268119006, | |
| "learning_rate": 3.6936326157461925e-07, | |
| "loss": 0.5914, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.8432910883122108, | |
| "grad_norm": 4.2460232246744525, | |
| "learning_rate": 3.6752869344504586e-07, | |
| "loss": 0.5152, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 0.8436934218467109, | |
| "grad_norm": 3.95033426326261, | |
| "learning_rate": 3.656983311376042e-07, | |
| "loss": 0.4448, | |
| "step": 2097 | |
| }, | |
| { | |
| "epoch": 0.8440957553812111, | |
| "grad_norm": 3.8931938435117286, | |
| "learning_rate": 3.638721782622745e-07, | |
| "loss": 0.499, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 0.8444980889157111, | |
| "grad_norm": 4.3118732957405985, | |
| "learning_rate": 3.620502384207342e-07, | |
| "loss": 0.49, | |
| "step": 2099 | |
| }, | |
| { | |
| "epoch": 0.8449004224502112, | |
| "grad_norm": 4.005112629756995, | |
| "learning_rate": 3.602325152063524e-07, | |
| "loss": 0.4125, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8453027559847113, | |
| "grad_norm": 3.9812849673226114, | |
| "learning_rate": 3.5841901220418e-07, | |
| "loss": 0.5451, | |
| "step": 2101 | |
| }, | |
| { | |
| "epoch": 0.8457050895192114, | |
| "grad_norm": 3.948178072792975, | |
| "learning_rate": 3.566097329909468e-07, | |
| "loss": 0.6427, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 0.8461074230537116, | |
| "grad_norm": 3.7188722401214016, | |
| "learning_rate": 3.5480468113504946e-07, | |
| "loss": 0.405, | |
| "step": 2103 | |
| }, | |
| { | |
| "epoch": 0.8465097565882116, | |
| "grad_norm": 3.913596983806647, | |
| "learning_rate": 3.530038601965491e-07, | |
| "loss": 0.4635, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 0.8469120901227117, | |
| "grad_norm": 4.411578338116122, | |
| "learning_rate": 3.5120727372716295e-07, | |
| "loss": 0.5511, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.8473144236572119, | |
| "grad_norm": 3.282083239679109, | |
| "learning_rate": 3.494149252702536e-07, | |
| "loss": 0.4441, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 0.8477167571917119, | |
| "grad_norm": 4.011319326831284, | |
| "learning_rate": 3.476268183608281e-07, | |
| "loss": 0.5018, | |
| "step": 2107 | |
| }, | |
| { | |
| "epoch": 0.848119090726212, | |
| "grad_norm": 4.284870919327636, | |
| "learning_rate": 3.4584295652552667e-07, | |
| "loss": 0.4846, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 0.8485214242607121, | |
| "grad_norm": 4.276416896965254, | |
| "learning_rate": 3.4406334328261765e-07, | |
| "loss": 0.5376, | |
| "step": 2109 | |
| }, | |
| { | |
| "epoch": 0.8489237577952122, | |
| "grad_norm": 3.834125175179826, | |
| "learning_rate": 3.4228798214198974e-07, | |
| "loss": 0.5651, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.8493260913297124, | |
| "grad_norm": 3.9930841906813517, | |
| "learning_rate": 3.405168766051445e-07, | |
| "loss": 0.3851, | |
| "step": 2111 | |
| }, | |
| { | |
| "epoch": 0.8497284248642124, | |
| "grad_norm": 4.2112351410317626, | |
| "learning_rate": 3.3875003016519137e-07, | |
| "loss": 0.5431, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 0.8501307583987125, | |
| "grad_norm": 4.52788654812861, | |
| "learning_rate": 3.369874463068398e-07, | |
| "loss": 0.5849, | |
| "step": 2113 | |
| }, | |
| { | |
| "epoch": 0.8505330919332126, | |
| "grad_norm": 3.8509825855331163, | |
| "learning_rate": 3.3522912850639106e-07, | |
| "loss": 0.5748, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 0.8509354254677127, | |
| "grad_norm": 3.510509250668585, | |
| "learning_rate": 3.334750802317338e-07, | |
| "loss": 0.4397, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.8513377590022129, | |
| "grad_norm": 3.8349389092945114, | |
| "learning_rate": 3.3172530494233467e-07, | |
| "loss": 0.4769, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 0.8517400925367129, | |
| "grad_norm": 3.8288751826393197, | |
| "learning_rate": 3.2997980608923426e-07, | |
| "loss": 0.5434, | |
| "step": 2117 | |
| }, | |
| { | |
| "epoch": 0.852142426071213, | |
| "grad_norm": 3.9001403159636374, | |
| "learning_rate": 3.282385871150387e-07, | |
| "loss": 0.4523, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 0.8525447596057132, | |
| "grad_norm": 3.6933669568597978, | |
| "learning_rate": 3.26501651453911e-07, | |
| "loss": 0.3905, | |
| "step": 2119 | |
| }, | |
| { | |
| "epoch": 0.8529470931402132, | |
| "grad_norm": 3.811615026103731, | |
| "learning_rate": 3.247690025315689e-07, | |
| "loss": 0.4305, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.8533494266747134, | |
| "grad_norm": 3.6210800698038925, | |
| "learning_rate": 3.2304064376527417e-07, | |
| "loss": 0.5332, | |
| "step": 2121 | |
| }, | |
| { | |
| "epoch": 0.8537517602092134, | |
| "grad_norm": 4.231885438858511, | |
| "learning_rate": 3.2131657856382753e-07, | |
| "loss": 0.5911, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 0.8541540937437135, | |
| "grad_norm": 4.464935656077254, | |
| "learning_rate": 3.195968103275618e-07, | |
| "loss": 0.5702, | |
| "step": 2123 | |
| }, | |
| { | |
| "epoch": 0.8545564272782137, | |
| "grad_norm": 4.3078684853827465, | |
| "learning_rate": 3.17881342448334e-07, | |
| "loss": 0.5691, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 0.8549587608127137, | |
| "grad_norm": 3.8241264163503255, | |
| "learning_rate": 3.1617017830952084e-07, | |
| "loss": 0.5375, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.8553610943472139, | |
| "grad_norm": 3.519525197941784, | |
| "learning_rate": 3.1446332128601015e-07, | |
| "loss": 0.4416, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 0.8557634278817139, | |
| "grad_norm": 3.4697912989841524, | |
| "learning_rate": 3.12760774744196e-07, | |
| "loss": 0.4459, | |
| "step": 2127 | |
| }, | |
| { | |
| "epoch": 0.856165761416214, | |
| "grad_norm": 3.7851796176468455, | |
| "learning_rate": 3.1106254204196883e-07, | |
| "loss": 0.4682, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 0.8565680949507142, | |
| "grad_norm": 3.936606771811409, | |
| "learning_rate": 3.093686265287138e-07, | |
| "loss": 0.5729, | |
| "step": 2129 | |
| }, | |
| { | |
| "epoch": 0.8569704284852142, | |
| "grad_norm": 4.190455136191216, | |
| "learning_rate": 3.076790315452988e-07, | |
| "loss": 0.5277, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8573727620197144, | |
| "grad_norm": 4.108892268475147, | |
| "learning_rate": 3.05993760424072e-07, | |
| "loss": 0.4329, | |
| "step": 2131 | |
| }, | |
| { | |
| "epoch": 0.8577750955542145, | |
| "grad_norm": 4.390564938079403, | |
| "learning_rate": 3.043128164888537e-07, | |
| "loss": 0.5011, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 0.8581774290887145, | |
| "grad_norm": 4.367475352759402, | |
| "learning_rate": 3.0263620305492877e-07, | |
| "loss": 0.4797, | |
| "step": 2133 | |
| }, | |
| { | |
| "epoch": 0.8585797626232147, | |
| "grad_norm": 3.8444843612579924, | |
| "learning_rate": 3.009639234290418e-07, | |
| "loss": 0.5327, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 0.8589820961577147, | |
| "grad_norm": 3.6090606638950318, | |
| "learning_rate": 2.992959809093901e-07, | |
| "loss": 0.4413, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.8593844296922148, | |
| "grad_norm": 4.388230282282369, | |
| "learning_rate": 2.976323787856175e-07, | |
| "loss": 0.5717, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 0.859786763226715, | |
| "grad_norm": 4.333473286328121, | |
| "learning_rate": 2.9597312033880557e-07, | |
| "loss": 0.6314, | |
| "step": 2137 | |
| }, | |
| { | |
| "epoch": 0.860189096761215, | |
| "grad_norm": 3.890510795148687, | |
| "learning_rate": 2.943182088414701e-07, | |
| "loss": 0.433, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 0.8605914302957152, | |
| "grad_norm": 4.234653691147445, | |
| "learning_rate": 2.9266764755755406e-07, | |
| "loss": 0.54, | |
| "step": 2139 | |
| }, | |
| { | |
| "epoch": 0.8609937638302152, | |
| "grad_norm": 4.183758834087082, | |
| "learning_rate": 2.910214397424202e-07, | |
| "loss": 0.4845, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8613960973647153, | |
| "grad_norm": 3.799668423286151, | |
| "learning_rate": 2.8937958864284353e-07, | |
| "loss": 0.4333, | |
| "step": 2141 | |
| }, | |
| { | |
| "epoch": 0.8617984308992155, | |
| "grad_norm": 4.133997388279442, | |
| "learning_rate": 2.877420974970088e-07, | |
| "loss": 0.4396, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 0.8622007644337155, | |
| "grad_norm": 4.0525547445688614, | |
| "learning_rate": 2.861089695345007e-07, | |
| "loss": 0.4857, | |
| "step": 2143 | |
| }, | |
| { | |
| "epoch": 0.8626030979682157, | |
| "grad_norm": 3.7880628956269904, | |
| "learning_rate": 2.8448020797629735e-07, | |
| "loss": 0.5046, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.8630054315027158, | |
| "grad_norm": 4.143267859487349, | |
| "learning_rate": 2.8285581603476747e-07, | |
| "loss": 0.5291, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.8634077650372158, | |
| "grad_norm": 3.8047765913147993, | |
| "learning_rate": 2.8123579691365923e-07, | |
| "loss": 0.5513, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 0.863810098571716, | |
| "grad_norm": 4.041711864076377, | |
| "learning_rate": 2.796201538080981e-07, | |
| "loss": 0.448, | |
| "step": 2147 | |
| }, | |
| { | |
| "epoch": 0.864212432106216, | |
| "grad_norm": 4.193683351365294, | |
| "learning_rate": 2.7800888990457855e-07, | |
| "loss": 0.5598, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 0.8646147656407162, | |
| "grad_norm": 3.6613935569874583, | |
| "learning_rate": 2.7640200838095793e-07, | |
| "loss": 0.4067, | |
| "step": 2149 | |
| }, | |
| { | |
| "epoch": 0.8650170991752163, | |
| "grad_norm": 4.104620267298875, | |
| "learning_rate": 2.7479951240644953e-07, | |
| "loss": 0.5648, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8654194327097163, | |
| "grad_norm": 3.7600215255952256, | |
| "learning_rate": 2.7320140514161846e-07, | |
| "loss": 0.3946, | |
| "step": 2151 | |
| }, | |
| { | |
| "epoch": 0.8658217662442165, | |
| "grad_norm": 3.9525358112645965, | |
| "learning_rate": 2.7160768973837293e-07, | |
| "loss": 0.53, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 0.8662240997787165, | |
| "grad_norm": 3.5450533427675226, | |
| "learning_rate": 2.7001836933995995e-07, | |
| "loss": 0.5298, | |
| "step": 2153 | |
| }, | |
| { | |
| "epoch": 0.8666264333132166, | |
| "grad_norm": 3.6668666878628815, | |
| "learning_rate": 2.684334470809588e-07, | |
| "loss": 0.5056, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 0.8670287668477168, | |
| "grad_norm": 4.224237505147509, | |
| "learning_rate": 2.6685292608727236e-07, | |
| "loss": 0.5218, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.8674311003822168, | |
| "grad_norm": 3.69557439406065, | |
| "learning_rate": 2.652768094761257e-07, | |
| "loss": 0.3756, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 0.867833433916717, | |
| "grad_norm": 4.281090167368473, | |
| "learning_rate": 2.637051003560548e-07, | |
| "loss": 0.6402, | |
| "step": 2157 | |
| }, | |
| { | |
| "epoch": 0.8682357674512171, | |
| "grad_norm": 3.9633212522861054, | |
| "learning_rate": 2.621378018269047e-07, | |
| "loss": 0.5075, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 0.8686381009857171, | |
| "grad_norm": 3.8583945548771825, | |
| "learning_rate": 2.6057491697982e-07, | |
| "loss": 0.6033, | |
| "step": 2159 | |
| }, | |
| { | |
| "epoch": 0.8690404345202173, | |
| "grad_norm": 3.3558856738663487, | |
| "learning_rate": 2.5901644889724186e-07, | |
| "loss": 0.4015, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.8694427680547173, | |
| "grad_norm": 3.5749358804434483, | |
| "learning_rate": 2.574624006528992e-07, | |
| "loss": 0.4944, | |
| "step": 2161 | |
| }, | |
| { | |
| "epoch": 0.8698451015892175, | |
| "grad_norm": 3.86587826976141, | |
| "learning_rate": 2.55912775311804e-07, | |
| "loss": 0.4891, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 0.8702474351237176, | |
| "grad_norm": 3.9917581972004785, | |
| "learning_rate": 2.543675759302464e-07, | |
| "loss": 0.5079, | |
| "step": 2163 | |
| }, | |
| { | |
| "epoch": 0.8706497686582176, | |
| "grad_norm": 4.393976999836958, | |
| "learning_rate": 2.5282680555578477e-07, | |
| "loss": 0.6145, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 0.8710521021927178, | |
| "grad_norm": 4.04228191167184, | |
| "learning_rate": 2.51290467227244e-07, | |
| "loss": 0.5429, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.8714544357272178, | |
| "grad_norm": 4.4282484060475324, | |
| "learning_rate": 2.497585639747077e-07, | |
| "loss": 0.6325, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 0.871856769261718, | |
| "grad_norm": 3.816275160612967, | |
| "learning_rate": 2.482310988195125e-07, | |
| "loss": 0.5543, | |
| "step": 2167 | |
| }, | |
| { | |
| "epoch": 0.8722591027962181, | |
| "grad_norm": 4.1145602142866675, | |
| "learning_rate": 2.4670807477424007e-07, | |
| "loss": 0.5527, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 0.8726614363307181, | |
| "grad_norm": 4.231321602541556, | |
| "learning_rate": 2.4518949484271525e-07, | |
| "loss": 0.6496, | |
| "step": 2169 | |
| }, | |
| { | |
| "epoch": 0.8730637698652183, | |
| "grad_norm": 4.220523454555819, | |
| "learning_rate": 2.4367536201999695e-07, | |
| "loss": 0.5652, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.8734661033997184, | |
| "grad_norm": 3.938889256449136, | |
| "learning_rate": 2.421656792923724e-07, | |
| "loss": 0.456, | |
| "step": 2171 | |
| }, | |
| { | |
| "epoch": 0.8738684369342185, | |
| "grad_norm": 4.5217908297671485, | |
| "learning_rate": 2.406604496373535e-07, | |
| "loss": 0.6334, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 0.8742707704687186, | |
| "grad_norm": 3.7526929266944773, | |
| "learning_rate": 2.391596760236681e-07, | |
| "loss": 0.4743, | |
| "step": 2173 | |
| }, | |
| { | |
| "epoch": 0.8746731040032186, | |
| "grad_norm": 3.710705344410006, | |
| "learning_rate": 2.376633614112561e-07, | |
| "loss": 0.5661, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 0.8750754375377188, | |
| "grad_norm": 3.2609967694053243, | |
| "learning_rate": 2.361715087512631e-07, | |
| "loss": 0.3589, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.8754777710722189, | |
| "grad_norm": 4.108101136746485, | |
| "learning_rate": 2.346841209860351e-07, | |
| "loss": 0.4368, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 0.875880104606719, | |
| "grad_norm": 3.763267262463342, | |
| "learning_rate": 2.3320120104911053e-07, | |
| "loss": 0.4953, | |
| "step": 2177 | |
| }, | |
| { | |
| "epoch": 0.8762824381412191, | |
| "grad_norm": 5.025294007587999, | |
| "learning_rate": 2.317227518652168e-07, | |
| "loss": 0.6524, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 0.8766847716757191, | |
| "grad_norm": 3.9367012271057584, | |
| "learning_rate": 2.302487763502645e-07, | |
| "loss": 0.4149, | |
| "step": 2179 | |
| }, | |
| { | |
| "epoch": 0.8770871052102193, | |
| "grad_norm": 4.236893564539156, | |
| "learning_rate": 2.2877927741134036e-07, | |
| "loss": 0.5532, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.8774894387447194, | |
| "grad_norm": 3.5424218920413373, | |
| "learning_rate": 2.2731425794670135e-07, | |
| "loss": 0.4691, | |
| "step": 2181 | |
| }, | |
| { | |
| "epoch": 0.8778917722792194, | |
| "grad_norm": 4.04407474745495, | |
| "learning_rate": 2.2585372084577033e-07, | |
| "loss": 0.5125, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 0.8782941058137196, | |
| "grad_norm": 3.8612028242647147, | |
| "learning_rate": 2.2439766898913e-07, | |
| "loss": 0.5256, | |
| "step": 2183 | |
| }, | |
| { | |
| "epoch": 0.8786964393482197, | |
| "grad_norm": 4.271299959657279, | |
| "learning_rate": 2.2294610524851685e-07, | |
| "loss": 0.4944, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 0.8790987728827198, | |
| "grad_norm": 4.465135814281888, | |
| "learning_rate": 2.2149903248681464e-07, | |
| "loss": 0.6834, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.8795011064172199, | |
| "grad_norm": 3.9117028793633173, | |
| "learning_rate": 2.2005645355805127e-07, | |
| "loss": 0.4605, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 0.8799034399517199, | |
| "grad_norm": 3.8402220541360292, | |
| "learning_rate": 2.1861837130738945e-07, | |
| "loss": 0.4467, | |
| "step": 2187 | |
| }, | |
| { | |
| "epoch": 0.8803057734862201, | |
| "grad_norm": 4.223182944719613, | |
| "learning_rate": 2.1718478857112506e-07, | |
| "loss": 0.5347, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 0.8807081070207202, | |
| "grad_norm": 3.8811613151194213, | |
| "learning_rate": 2.1575570817667952e-07, | |
| "loss": 0.5265, | |
| "step": 2189 | |
| }, | |
| { | |
| "epoch": 0.8811104405552203, | |
| "grad_norm": 3.8705546986459956, | |
| "learning_rate": 2.143311329425926e-07, | |
| "loss": 0.4427, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8815127740897204, | |
| "grad_norm": 4.5414937373063955, | |
| "learning_rate": 2.1291106567852105e-07, | |
| "loss": 0.6442, | |
| "step": 2191 | |
| }, | |
| { | |
| "epoch": 0.8819151076242204, | |
| "grad_norm": 3.588299387450043, | |
| "learning_rate": 2.1149550918522887e-07, | |
| "loss": 0.4173, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 0.8823174411587206, | |
| "grad_norm": 4.165225372551161, | |
| "learning_rate": 2.100844662545848e-07, | |
| "loss": 0.5496, | |
| "step": 2193 | |
| }, | |
| { | |
| "epoch": 0.8827197746932207, | |
| "grad_norm": 4.217297273907552, | |
| "learning_rate": 2.086779396695554e-07, | |
| "loss": 0.5329, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 0.8831221082277207, | |
| "grad_norm": 3.980212414160413, | |
| "learning_rate": 2.0727593220419812e-07, | |
| "loss": 0.4682, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.8835244417622209, | |
| "grad_norm": 4.015069610064507, | |
| "learning_rate": 2.0587844662365963e-07, | |
| "loss": 0.4975, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 0.883926775296721, | |
| "grad_norm": 3.675033120744641, | |
| "learning_rate": 2.0448548568416715e-07, | |
| "loss": 0.4932, | |
| "step": 2197 | |
| }, | |
| { | |
| "epoch": 0.8843291088312211, | |
| "grad_norm": 3.7917456823069178, | |
| "learning_rate": 2.030970521330247e-07, | |
| "loss": 0.4523, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 0.8847314423657212, | |
| "grad_norm": 4.2084887531102195, | |
| "learning_rate": 2.0171314870860548e-07, | |
| "loss": 0.5797, | |
| "step": 2199 | |
| }, | |
| { | |
| "epoch": 0.8851337759002212, | |
| "grad_norm": 4.1920678247066245, | |
| "learning_rate": 2.0033377814034999e-07, | |
| "loss": 0.5458, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8855361094347214, | |
| "grad_norm": 4.341033030777012, | |
| "learning_rate": 1.9895894314875707e-07, | |
| "loss": 0.5069, | |
| "step": 2201 | |
| }, | |
| { | |
| "epoch": 0.8859384429692215, | |
| "grad_norm": 4.251489033634488, | |
| "learning_rate": 1.9758864644538188e-07, | |
| "loss": 0.5658, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 0.8863407765037216, | |
| "grad_norm": 4.118913583140747, | |
| "learning_rate": 1.9622289073282675e-07, | |
| "loss": 0.5186, | |
| "step": 2203 | |
| }, | |
| { | |
| "epoch": 0.8867431100382217, | |
| "grad_norm": 4.17198274381597, | |
| "learning_rate": 1.9486167870473915e-07, | |
| "loss": 0.5394, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 0.8871454435727217, | |
| "grad_norm": 4.1140466186765865, | |
| "learning_rate": 1.9350501304580577e-07, | |
| "loss": 0.6302, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.8875477771072219, | |
| "grad_norm": 4.252952260852059, | |
| "learning_rate": 1.9215289643174485e-07, | |
| "loss": 0.5715, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 0.887950110641722, | |
| "grad_norm": 3.9053915680472673, | |
| "learning_rate": 1.9080533152930486e-07, | |
| "loss": 0.502, | |
| "step": 2207 | |
| }, | |
| { | |
| "epoch": 0.8883524441762221, | |
| "grad_norm": 3.702225135993101, | |
| "learning_rate": 1.8946232099625482e-07, | |
| "loss": 0.3917, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 0.8887547777107222, | |
| "grad_norm": 3.5875962058050863, | |
| "learning_rate": 1.8812386748138234e-07, | |
| "loss": 0.3413, | |
| "step": 2209 | |
| }, | |
| { | |
| "epoch": 0.8891571112452223, | |
| "grad_norm": 3.6894845944247723, | |
| "learning_rate": 1.8678997362448743e-07, | |
| "loss": 0.4334, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.8895594447797224, | |
| "grad_norm": 3.977873097022255, | |
| "learning_rate": 1.8546064205637758e-07, | |
| "loss": 0.4746, | |
| "step": 2211 | |
| }, | |
| { | |
| "epoch": 0.8899617783142225, | |
| "grad_norm": 3.4865669801340147, | |
| "learning_rate": 1.8413587539886064e-07, | |
| "loss": 0.4173, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 0.8903641118487226, | |
| "grad_norm": 4.35862373407535, | |
| "learning_rate": 1.828156762647429e-07, | |
| "loss": 0.5653, | |
| "step": 2213 | |
| }, | |
| { | |
| "epoch": 0.8907664453832227, | |
| "grad_norm": 4.2652495550914775, | |
| "learning_rate": 1.81500047257821e-07, | |
| "loss": 0.5071, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 0.8911687789177228, | |
| "grad_norm": 3.285190042692873, | |
| "learning_rate": 1.8018899097287906e-07, | |
| "loss": 0.3731, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.8915711124522229, | |
| "grad_norm": 3.782205241606253, | |
| "learning_rate": 1.78882509995682e-07, | |
| "loss": 0.4907, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 0.891973445986723, | |
| "grad_norm": 4.391188417961942, | |
| "learning_rate": 1.7758060690297035e-07, | |
| "loss": 0.3594, | |
| "step": 2217 | |
| }, | |
| { | |
| "epoch": 0.892375779521223, | |
| "grad_norm": 3.172977998916911, | |
| "learning_rate": 1.7628328426245677e-07, | |
| "loss": 0.3866, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 0.8927781130557232, | |
| "grad_norm": 4.109857114119739, | |
| "learning_rate": 1.7499054463281985e-07, | |
| "loss": 0.5489, | |
| "step": 2219 | |
| }, | |
| { | |
| "epoch": 0.8931804465902233, | |
| "grad_norm": 3.6387926247562214, | |
| "learning_rate": 1.7370239056369946e-07, | |
| "loss": 0.3892, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.8935827801247234, | |
| "grad_norm": 3.897940749417438, | |
| "learning_rate": 1.7241882459569008e-07, | |
| "loss": 0.6287, | |
| "step": 2221 | |
| }, | |
| { | |
| "epoch": 0.8939851136592235, | |
| "grad_norm": 3.968476012728493, | |
| "learning_rate": 1.7113984926033832e-07, | |
| "loss": 0.459, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 0.8943874471937237, | |
| "grad_norm": 4.209938898766111, | |
| "learning_rate": 1.6986546708013722e-07, | |
| "loss": 0.5426, | |
| "step": 2223 | |
| }, | |
| { | |
| "epoch": 0.8947897807282237, | |
| "grad_norm": 3.791580194250541, | |
| "learning_rate": 1.6859568056852016e-07, | |
| "loss": 0.498, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 0.8951921142627238, | |
| "grad_norm": 3.8639160637014665, | |
| "learning_rate": 1.673304922298563e-07, | |
| "loss": 0.4049, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.8955944477972239, | |
| "grad_norm": 4.147228062957371, | |
| "learning_rate": 1.6606990455944634e-07, | |
| "loss": 0.5258, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 0.895996781331724, | |
| "grad_norm": 4.247308433616599, | |
| "learning_rate": 1.648139200435178e-07, | |
| "loss": 0.5252, | |
| "step": 2227 | |
| }, | |
| { | |
| "epoch": 0.8963991148662241, | |
| "grad_norm": 4.057558052673904, | |
| "learning_rate": 1.635625411592179e-07, | |
| "loss": 0.4851, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 0.8968014484007242, | |
| "grad_norm": 4.529371570037193, | |
| "learning_rate": 1.6231577037461228e-07, | |
| "loss": 0.6091, | |
| "step": 2229 | |
| }, | |
| { | |
| "epoch": 0.8972037819352243, | |
| "grad_norm": 4.092889891063284, | |
| "learning_rate": 1.6107361014867622e-07, | |
| "loss": 0.5051, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8976061154697244, | |
| "grad_norm": 4.057510211081129, | |
| "learning_rate": 1.598360629312931e-07, | |
| "loss": 0.5379, | |
| "step": 2231 | |
| }, | |
| { | |
| "epoch": 0.8980084490042245, | |
| "grad_norm": 4.08275289824886, | |
| "learning_rate": 1.586031311632477e-07, | |
| "loss": 0.5542, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 0.8984107825387246, | |
| "grad_norm": 5.7116466876850875, | |
| "learning_rate": 1.5737481727622212e-07, | |
| "loss": 0.5021, | |
| "step": 2233 | |
| }, | |
| { | |
| "epoch": 0.8988131160732247, | |
| "grad_norm": 3.507507523132951, | |
| "learning_rate": 1.5615112369278957e-07, | |
| "loss": 0.391, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 0.8992154496077248, | |
| "grad_norm": 3.7113726990024194, | |
| "learning_rate": 1.5493205282641228e-07, | |
| "loss": 0.4515, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.899617783142225, | |
| "grad_norm": 3.512898207115907, | |
| "learning_rate": 1.5371760708143447e-07, | |
| "loss": 0.4935, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 0.900020116676725, | |
| "grad_norm": 4.213491522925021, | |
| "learning_rate": 1.525077888530782e-07, | |
| "loss": 0.538, | |
| "step": 2237 | |
| }, | |
| { | |
| "epoch": 0.9004224502112251, | |
| "grad_norm": 4.482285849278304, | |
| "learning_rate": 1.5130260052743955e-07, | |
| "loss": 0.6545, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 0.9008247837457252, | |
| "grad_norm": 3.7492718480617384, | |
| "learning_rate": 1.5010204448148214e-07, | |
| "loss": 0.4442, | |
| "step": 2239 | |
| }, | |
| { | |
| "epoch": 0.9012271172802253, | |
| "grad_norm": 3.9838020107249195, | |
| "learning_rate": 1.489061230830338e-07, | |
| "loss": 0.4953, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.9016294508147255, | |
| "grad_norm": 3.9003308620272796, | |
| "learning_rate": 1.4771483869078228e-07, | |
| "loss": 0.5208, | |
| "step": 2241 | |
| }, | |
| { | |
| "epoch": 0.9020317843492255, | |
| "grad_norm": 3.788631062888711, | |
| "learning_rate": 1.4652819365426867e-07, | |
| "loss": 0.5613, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 0.9024341178837256, | |
| "grad_norm": 4.676276663949487, | |
| "learning_rate": 1.453461903138853e-07, | |
| "loss": 0.5352, | |
| "step": 2243 | |
| }, | |
| { | |
| "epoch": 0.9028364514182257, | |
| "grad_norm": 4.525346271987768, | |
| "learning_rate": 1.441688310008682e-07, | |
| "loss": 0.5099, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 0.9032387849527258, | |
| "grad_norm": 3.625199458865597, | |
| "learning_rate": 1.4299611803729547e-07, | |
| "loss": 0.4766, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.903641118487226, | |
| "grad_norm": 3.8753089109297787, | |
| "learning_rate": 1.4182805373608055e-07, | |
| "loss": 0.5326, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 0.904043452021726, | |
| "grad_norm": 4.021228834417677, | |
| "learning_rate": 1.4066464040096956e-07, | |
| "loss": 0.5691, | |
| "step": 2247 | |
| }, | |
| { | |
| "epoch": 0.9044457855562261, | |
| "grad_norm": 4.6752863808538105, | |
| "learning_rate": 1.395058803265334e-07, | |
| "loss": 0.7598, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 0.9048481190907263, | |
| "grad_norm": 3.7179471187703514, | |
| "learning_rate": 1.3835177579816783e-07, | |
| "loss": 0.3955, | |
| "step": 2249 | |
| }, | |
| { | |
| "epoch": 0.9052504526252263, | |
| "grad_norm": 5.00701237752423, | |
| "learning_rate": 1.3720232909208541e-07, | |
| "loss": 0.6319, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.9056527861597264, | |
| "grad_norm": 3.7834417036951895, | |
| "learning_rate": 1.3605754247531245e-07, | |
| "loss": 0.5174, | |
| "step": 2251 | |
| }, | |
| { | |
| "epoch": 0.9060551196942265, | |
| "grad_norm": 3.680557056193272, | |
| "learning_rate": 1.3491741820568366e-07, | |
| "loss": 0.4133, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 0.9064574532287266, | |
| "grad_norm": 4.046295696144512, | |
| "learning_rate": 1.337819585318395e-07, | |
| "loss": 0.6443, | |
| "step": 2253 | |
| }, | |
| { | |
| "epoch": 0.9068597867632268, | |
| "grad_norm": 4.363328152667618, | |
| "learning_rate": 1.3265116569322e-07, | |
| "loss": 0.5437, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 0.9072621202977268, | |
| "grad_norm": 4.437499912355556, | |
| "learning_rate": 1.3152504192006005e-07, | |
| "loss": 0.5439, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.9076644538322269, | |
| "grad_norm": 3.904533797833089, | |
| "learning_rate": 1.3040358943338742e-07, | |
| "loss": 0.4058, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 0.908066787366727, | |
| "grad_norm": 3.990363526582056, | |
| "learning_rate": 1.2928681044501513e-07, | |
| "loss": 0.4204, | |
| "step": 2257 | |
| }, | |
| { | |
| "epoch": 0.9084691209012271, | |
| "grad_norm": 4.0760801231394606, | |
| "learning_rate": 1.281747071575404e-07, | |
| "loss": 0.6104, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 0.9088714544357273, | |
| "grad_norm": 4.256580027345346, | |
| "learning_rate": 1.270672817643376e-07, | |
| "loss": 0.6544, | |
| "step": 2259 | |
| }, | |
| { | |
| "epoch": 0.9092737879702273, | |
| "grad_norm": 4.71579332313011, | |
| "learning_rate": 1.2596453644955597e-07, | |
| "loss": 0.6273, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.9096761215047274, | |
| "grad_norm": 3.483958223270458, | |
| "learning_rate": 1.2486647338811248e-07, | |
| "loss": 0.4546, | |
| "step": 2261 | |
| }, | |
| { | |
| "epoch": 0.9100784550392276, | |
| "grad_norm": 3.9093619147070946, | |
| "learning_rate": 1.2377309474569165e-07, | |
| "loss": 0.4985, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 0.9104807885737276, | |
| "grad_norm": 4.057320495855987, | |
| "learning_rate": 1.226844026787377e-07, | |
| "loss": 0.5448, | |
| "step": 2263 | |
| }, | |
| { | |
| "epoch": 0.9108831221082277, | |
| "grad_norm": 3.5360903799800503, | |
| "learning_rate": 1.2160039933445222e-07, | |
| "loss": 0.4164, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 0.9112854556427278, | |
| "grad_norm": 4.087420290794404, | |
| "learning_rate": 1.2052108685078834e-07, | |
| "loss": 0.594, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.9116877891772279, | |
| "grad_norm": 3.9832710853164763, | |
| "learning_rate": 1.1944646735644882e-07, | |
| "loss": 0.452, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 0.9120901227117281, | |
| "grad_norm": 4.282566514995135, | |
| "learning_rate": 1.1837654297087997e-07, | |
| "loss": 0.5473, | |
| "step": 2267 | |
| }, | |
| { | |
| "epoch": 0.9124924562462281, | |
| "grad_norm": 3.9435273915235656, | |
| "learning_rate": 1.1731131580426796e-07, | |
| "loss": 0.6224, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 0.9128947897807282, | |
| "grad_norm": 4.3103292605724155, | |
| "learning_rate": 1.1625078795753476e-07, | |
| "loss": 0.5467, | |
| "step": 2269 | |
| }, | |
| { | |
| "epoch": 0.9132971233152283, | |
| "grad_norm": 4.22573396631058, | |
| "learning_rate": 1.1519496152233439e-07, | |
| "loss": 0.5037, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.9136994568497284, | |
| "grad_norm": 3.7886525775152338, | |
| "learning_rate": 1.1414383858104756e-07, | |
| "loss": 0.4498, | |
| "step": 2271 | |
| }, | |
| { | |
| "epoch": 0.9141017903842286, | |
| "grad_norm": 3.2224197154073617, | |
| "learning_rate": 1.1309742120677897e-07, | |
| "loss": 0.4794, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 0.9145041239187286, | |
| "grad_norm": 4.705013349470272, | |
| "learning_rate": 1.1205571146335303e-07, | |
| "loss": 0.6041, | |
| "step": 2273 | |
| }, | |
| { | |
| "epoch": 0.9149064574532287, | |
| "grad_norm": 4.6403427413148535, | |
| "learning_rate": 1.1101871140530851e-07, | |
| "loss": 0.6308, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 0.9153087909877289, | |
| "grad_norm": 3.819381857552772, | |
| "learning_rate": 1.0998642307789576e-07, | |
| "loss": 0.5461, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.9157111245222289, | |
| "grad_norm": 3.997200472540085, | |
| "learning_rate": 1.0895884851707255e-07, | |
| "loss": 0.4926, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 0.9161134580567291, | |
| "grad_norm": 4.162425768576248, | |
| "learning_rate": 1.079359897494997e-07, | |
| "loss": 0.489, | |
| "step": 2277 | |
| }, | |
| { | |
| "epoch": 0.9165157915912291, | |
| "grad_norm": 3.8422971328140454, | |
| "learning_rate": 1.0691784879253736e-07, | |
| "loss": 0.5291, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 0.9169181251257292, | |
| "grad_norm": 4.112928250644688, | |
| "learning_rate": 1.0590442765423953e-07, | |
| "loss": 0.4926, | |
| "step": 2279 | |
| }, | |
| { | |
| "epoch": 0.9173204586602294, | |
| "grad_norm": 3.9378155981778664, | |
| "learning_rate": 1.0489572833335349e-07, | |
| "loss": 0.4722, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.9177227921947294, | |
| "grad_norm": 4.631927118939465, | |
| "learning_rate": 1.038917528193123e-07, | |
| "loss": 0.5274, | |
| "step": 2281 | |
| }, | |
| { | |
| "epoch": 0.9181251257292296, | |
| "grad_norm": 4.035377760065258, | |
| "learning_rate": 1.0289250309223314e-07, | |
| "loss": 0.5788, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 0.9185274592637296, | |
| "grad_norm": 3.868825647605808, | |
| "learning_rate": 1.0189798112291177e-07, | |
| "loss": 0.5046, | |
| "step": 2283 | |
| }, | |
| { | |
| "epoch": 0.9189297927982297, | |
| "grad_norm": 4.194215283322997, | |
| "learning_rate": 1.0090818887282112e-07, | |
| "loss": 0.6029, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 0.9193321263327299, | |
| "grad_norm": 4.215786203028234, | |
| "learning_rate": 9.99231282941035e-08, | |
| "loss": 0.4987, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.9197344598672299, | |
| "grad_norm": 4.911243421308879, | |
| "learning_rate": 9.894280132957124e-08, | |
| "loss": 0.6475, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 0.92013679340173, | |
| "grad_norm": 4.551350447477857, | |
| "learning_rate": 9.79672099126991e-08, | |
| "loss": 0.6439, | |
| "step": 2287 | |
| }, | |
| { | |
| "epoch": 0.9205391269362302, | |
| "grad_norm": 4.0194710398308615, | |
| "learning_rate": 9.69963559676232e-08, | |
| "loss": 0.5376, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 0.9209414604707302, | |
| "grad_norm": 3.83021269408099, | |
| "learning_rate": 9.60302414091352e-08, | |
| "loss": 0.5093, | |
| "step": 2289 | |
| }, | |
| { | |
| "epoch": 0.9213437940052304, | |
| "grad_norm": 4.1940419822947534, | |
| "learning_rate": 9.506886814268007e-08, | |
| "loss": 0.5702, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.9217461275397304, | |
| "grad_norm": 4.341103613511488, | |
| "learning_rate": 9.41122380643511e-08, | |
| "loss": 0.5271, | |
| "step": 2291 | |
| }, | |
| { | |
| "epoch": 0.9221484610742305, | |
| "grad_norm": 4.421018842084841, | |
| "learning_rate": 9.316035306088684e-08, | |
| "loss": 0.5923, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 0.9225507946087307, | |
| "grad_norm": 4.192150302264493, | |
| "learning_rate": 9.221321500966746e-08, | |
| "loss": 0.5475, | |
| "step": 2293 | |
| }, | |
| { | |
| "epoch": 0.9229531281432307, | |
| "grad_norm": 4.56023548088152, | |
| "learning_rate": 9.127082577871038e-08, | |
| "loss": 0.6055, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 0.9233554616777309, | |
| "grad_norm": 3.727375694339219, | |
| "learning_rate": 9.033318722666801e-08, | |
| "loss": 0.3706, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.9237577952122309, | |
| "grad_norm": 3.7651015398148897, | |
| "learning_rate": 8.940030120282162e-08, | |
| "loss": 0.5118, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 0.924160128746731, | |
| "grad_norm": 4.305229814421829, | |
| "learning_rate": 8.847216954708055e-08, | |
| "loss": 0.5453, | |
| "step": 2297 | |
| }, | |
| { | |
| "epoch": 0.9245624622812312, | |
| "grad_norm": 4.1747564223004066, | |
| "learning_rate": 8.754879408997636e-08, | |
| "loss": 0.5156, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 0.9249647958157312, | |
| "grad_norm": 3.649399704340268, | |
| "learning_rate": 8.663017665266032e-08, | |
| "loss": 0.3701, | |
| "step": 2299 | |
| }, | |
| { | |
| "epoch": 0.9253671293502314, | |
| "grad_norm": 3.7596577501273734, | |
| "learning_rate": 8.571631904690014e-08, | |
| "loss": 0.4399, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9257694628847315, | |
| "grad_norm": 4.230082435346815, | |
| "learning_rate": 8.480722307507461e-08, | |
| "loss": 0.4874, | |
| "step": 2301 | |
| }, | |
| { | |
| "epoch": 0.9261717964192315, | |
| "grad_norm": 3.9612627542935197, | |
| "learning_rate": 8.390289053017226e-08, | |
| "loss": 0.5485, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 0.9265741299537317, | |
| "grad_norm": 3.5438909670467003, | |
| "learning_rate": 8.300332319578664e-08, | |
| "loss": 0.3951, | |
| "step": 2303 | |
| }, | |
| { | |
| "epoch": 0.9269764634882317, | |
| "grad_norm": 3.2833308636054217, | |
| "learning_rate": 8.210852284611299e-08, | |
| "loss": 0.407, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 0.9273787970227318, | |
| "grad_norm": 3.4444311785022865, | |
| "learning_rate": 8.121849124594405e-08, | |
| "loss": 0.3925, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.927781130557232, | |
| "grad_norm": 4.548198093339736, | |
| "learning_rate": 8.033323015066818e-08, | |
| "loss": 0.719, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 0.928183464091732, | |
| "grad_norm": 3.6123982321182284, | |
| "learning_rate": 7.945274130626484e-08, | |
| "loss": 0.3828, | |
| "step": 2307 | |
| }, | |
| { | |
| "epoch": 0.9285857976262322, | |
| "grad_norm": 3.177723392355348, | |
| "learning_rate": 7.857702644930133e-08, | |
| "loss": 0.353, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 0.9289881311607322, | |
| "grad_norm": 4.133100828896777, | |
| "learning_rate": 7.770608730692852e-08, | |
| "loss": 0.4977, | |
| "step": 2309 | |
| }, | |
| { | |
| "epoch": 0.9293904646952323, | |
| "grad_norm": 3.562342202197354, | |
| "learning_rate": 7.683992559687881e-08, | |
| "loss": 0.3892, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.9297927982297325, | |
| "grad_norm": 3.898366854418511, | |
| "learning_rate": 7.597854302746288e-08, | |
| "loss": 0.4583, | |
| "step": 2311 | |
| }, | |
| { | |
| "epoch": 0.9301951317642325, | |
| "grad_norm": 4.612414420760638, | |
| "learning_rate": 7.512194129756428e-08, | |
| "loss": 0.5581, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 0.9305974652987327, | |
| "grad_norm": 3.988029769560431, | |
| "learning_rate": 7.42701220966388e-08, | |
| "loss": 0.4744, | |
| "step": 2313 | |
| }, | |
| { | |
| "epoch": 0.9309997988332327, | |
| "grad_norm": 4.481476524243926, | |
| "learning_rate": 7.342308710470814e-08, | |
| "loss": 0.59, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 0.9314021323677328, | |
| "grad_norm": 4.562506876878105, | |
| "learning_rate": 7.258083799235982e-08, | |
| "loss": 0.6742, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.931804465902233, | |
| "grad_norm": 4.628598132831451, | |
| "learning_rate": 7.174337642074147e-08, | |
| "loss": 0.6008, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 0.932206799436733, | |
| "grad_norm": 4.283379928862786, | |
| "learning_rate": 7.091070404155854e-08, | |
| "loss": 0.5053, | |
| "step": 2317 | |
| }, | |
| { | |
| "epoch": 0.9326091329712332, | |
| "grad_norm": 3.9776126177559155, | |
| "learning_rate": 7.008282249707065e-08, | |
| "loss": 0.4839, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 0.9330114665057333, | |
| "grad_norm": 4.069433607383545, | |
| "learning_rate": 6.925973342008897e-08, | |
| "loss": 0.453, | |
| "step": 2319 | |
| }, | |
| { | |
| "epoch": 0.9334138000402333, | |
| "grad_norm": 4.037972743715134, | |
| "learning_rate": 6.844143843397238e-08, | |
| "loss": 0.5427, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.9338161335747335, | |
| "grad_norm": 3.547048708842676, | |
| "learning_rate": 6.76279391526241e-08, | |
| "loss": 0.4561, | |
| "step": 2321 | |
| }, | |
| { | |
| "epoch": 0.9342184671092335, | |
| "grad_norm": 3.7831575419487256, | |
| "learning_rate": 6.681923718049016e-08, | |
| "loss": 0.5184, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 0.9346208006437337, | |
| "grad_norm": 3.7960605831677428, | |
| "learning_rate": 6.601533411255328e-08, | |
| "loss": 0.428, | |
| "step": 2323 | |
| }, | |
| { | |
| "epoch": 0.9350231341782338, | |
| "grad_norm": 3.699019611927255, | |
| "learning_rate": 6.52162315343327e-08, | |
| "loss": 0.4711, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 0.9354254677127338, | |
| "grad_norm": 3.7499929742301465, | |
| "learning_rate": 6.44219310218791e-08, | |
| "loss": 0.494, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.935827801247234, | |
| "grad_norm": 3.982088440613016, | |
| "learning_rate": 6.363243414177239e-08, | |
| "loss": 0.5013, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 0.936230134781734, | |
| "grad_norm": 3.5405567172339034, | |
| "learning_rate": 6.284774245111868e-08, | |
| "loss": 0.4151, | |
| "step": 2327 | |
| }, | |
| { | |
| "epoch": 0.9366324683162341, | |
| "grad_norm": 3.9069783277866894, | |
| "learning_rate": 6.206785749754613e-08, | |
| "loss": 0.5257, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 0.9370348018507343, | |
| "grad_norm": 4.560908800971963, | |
| "learning_rate": 6.12927808192032e-08, | |
| "loss": 0.5827, | |
| "step": 2329 | |
| }, | |
| { | |
| "epoch": 0.9374371353852343, | |
| "grad_norm": 3.6067893534884967, | |
| "learning_rate": 6.052251394475544e-08, | |
| "loss": 0.4533, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.9378394689197345, | |
| "grad_norm": 4.228506402107548, | |
| "learning_rate": 5.975705839338208e-08, | |
| "loss": 0.5367, | |
| "step": 2331 | |
| }, | |
| { | |
| "epoch": 0.9382418024542346, | |
| "grad_norm": 4.377928096595712, | |
| "learning_rate": 5.89964156747716e-08, | |
| "loss": 0.487, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 0.9386441359887346, | |
| "grad_norm": 4.001917632135577, | |
| "learning_rate": 5.8240587289122576e-08, | |
| "loss": 0.5991, | |
| "step": 2333 | |
| }, | |
| { | |
| "epoch": 0.9390464695232348, | |
| "grad_norm": 3.89102054624204, | |
| "learning_rate": 5.7489574727136734e-08, | |
| "loss": 0.5289, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 0.9394488030577348, | |
| "grad_norm": 3.9529925607216314, | |
| "learning_rate": 5.67433794700184e-08, | |
| "loss": 0.5516, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.939851136592235, | |
| "grad_norm": 3.9965015035833114, | |
| "learning_rate": 5.600200298947034e-08, | |
| "loss": 0.5758, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 0.9402534701267351, | |
| "grad_norm": 4.354462501990094, | |
| "learning_rate": 5.52654467476918e-08, | |
| "loss": 0.5793, | |
| "step": 2337 | |
| }, | |
| { | |
| "epoch": 0.9406558036612351, | |
| "grad_norm": 4.199766571714081, | |
| "learning_rate": 5.453371219737491e-08, | |
| "loss": 0.5688, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 0.9410581371957353, | |
| "grad_norm": 3.3551684650390454, | |
| "learning_rate": 5.380680078170275e-08, | |
| "loss": 0.3524, | |
| "step": 2339 | |
| }, | |
| { | |
| "epoch": 0.9414604707302353, | |
| "grad_norm": 4.408645627917106, | |
| "learning_rate": 5.3084713934344613e-08, | |
| "loss": 0.5442, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.9418628042647355, | |
| "grad_norm": 4.4356672674608895, | |
| "learning_rate": 5.236745307945545e-08, | |
| "loss": 0.5028, | |
| "step": 2341 | |
| }, | |
| { | |
| "epoch": 0.9422651377992356, | |
| "grad_norm": 3.46236072019421, | |
| "learning_rate": 5.165501963167174e-08, | |
| "loss": 0.3921, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 0.9426674713337356, | |
| "grad_norm": 4.4269078668736075, | |
| "learning_rate": 5.094741499610839e-08, | |
| "loss": 0.5983, | |
| "step": 2343 | |
| }, | |
| { | |
| "epoch": 0.9430698048682358, | |
| "grad_norm": 4.503787561410546, | |
| "learning_rate": 5.02446405683582e-08, | |
| "loss": 0.585, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 0.9434721384027359, | |
| "grad_norm": 3.658547311389223, | |
| "learning_rate": 4.9546697734485224e-08, | |
| "loss": 0.4762, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.943874471937236, | |
| "grad_norm": 4.353601396348682, | |
| "learning_rate": 4.885358787102584e-08, | |
| "loss": 0.6102, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 0.9442768054717361, | |
| "grad_norm": 4.143732790296102, | |
| "learning_rate": 4.816531234498406e-08, | |
| "loss": 0.6124, | |
| "step": 2347 | |
| }, | |
| { | |
| "epoch": 0.9446791390062361, | |
| "grad_norm": 4.3975559021632575, | |
| "learning_rate": 4.7481872513829575e-08, | |
| "loss": 0.5633, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 0.9450814725407363, | |
| "grad_norm": 4.066972538920918, | |
| "learning_rate": 4.680326972549387e-08, | |
| "loss": 0.5118, | |
| "step": 2349 | |
| }, | |
| { | |
| "epoch": 0.9454838060752364, | |
| "grad_norm": 4.03878765773838, | |
| "learning_rate": 4.6129505318369127e-08, | |
| "loss": 0.4778, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.9458861396097364, | |
| "grad_norm": 4.201321170733065, | |
| "learning_rate": 4.546058062130487e-08, | |
| "loss": 0.5128, | |
| "step": 2351 | |
| }, | |
| { | |
| "epoch": 0.9462884731442366, | |
| "grad_norm": 3.8094183727880395, | |
| "learning_rate": 4.4796496953605493e-08, | |
| "loss": 0.5441, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 0.9466908066787366, | |
| "grad_norm": 3.5299699606250834, | |
| "learning_rate": 4.413725562502691e-08, | |
| "loss": 0.4678, | |
| "step": 2353 | |
| }, | |
| { | |
| "epoch": 0.9470931402132368, | |
| "grad_norm": 3.9087226991955717, | |
| "learning_rate": 4.348285793577517e-08, | |
| "loss": 0.5329, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 0.9474954737477369, | |
| "grad_norm": 4.0121109992844755, | |
| "learning_rate": 4.2833305176503136e-08, | |
| "loss": 0.5837, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.9478978072822369, | |
| "grad_norm": 4.544145628732853, | |
| "learning_rate": 4.218859862830826e-08, | |
| "loss": 0.3829, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 0.9483001408167371, | |
| "grad_norm": 3.89076415048523, | |
| "learning_rate": 4.154873956272981e-08, | |
| "loss": 0.4691, | |
| "step": 2357 | |
| }, | |
| { | |
| "epoch": 0.9487024743512372, | |
| "grad_norm": 4.1725175451999, | |
| "learning_rate": 4.091372924174636e-08, | |
| "loss": 0.4885, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 0.9491048078857373, | |
| "grad_norm": 4.3450167781713285, | |
| "learning_rate": 4.028356891777385e-08, | |
| "loss": 0.5124, | |
| "step": 2359 | |
| }, | |
| { | |
| "epoch": 0.9495071414202374, | |
| "grad_norm": 4.4322372412295605, | |
| "learning_rate": 3.9658259833662284e-08, | |
| "loss": 0.5234, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.9499094749547374, | |
| "grad_norm": 3.707524532117256, | |
| "learning_rate": 3.903780322269374e-08, | |
| "loss": 0.4149, | |
| "step": 2361 | |
| }, | |
| { | |
| "epoch": 0.9503118084892376, | |
| "grad_norm": 5.065679790929374, | |
| "learning_rate": 3.842220030858018e-08, | |
| "loss": 0.688, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 0.9507141420237377, | |
| "grad_norm": 3.786912616101777, | |
| "learning_rate": 3.781145230546013e-08, | |
| "loss": 0.43, | |
| "step": 2363 | |
| }, | |
| { | |
| "epoch": 0.9511164755582378, | |
| "grad_norm": 3.6227665097152224, | |
| "learning_rate": 3.720556041789752e-08, | |
| "loss": 0.4382, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 0.9515188090927379, | |
| "grad_norm": 3.999125768684484, | |
| "learning_rate": 3.660452584087815e-08, | |
| "loss": 0.5716, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.9519211426272379, | |
| "grad_norm": 3.409457081881056, | |
| "learning_rate": 3.600834975980877e-08, | |
| "loss": 0.5407, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 0.9523234761617381, | |
| "grad_norm": 3.892658686650397, | |
| "learning_rate": 3.5417033350512444e-08, | |
| "loss": 0.4589, | |
| "step": 2367 | |
| }, | |
| { | |
| "epoch": 0.9527258096962382, | |
| "grad_norm": 4.000437062384583, | |
| "learning_rate": 3.483057777922905e-08, | |
| "loss": 0.4875, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 0.9531281432307382, | |
| "grad_norm": 4.046666707315621, | |
| "learning_rate": 3.42489842026103e-08, | |
| "loss": 0.5162, | |
| "step": 2369 | |
| }, | |
| { | |
| "epoch": 0.9535304767652384, | |
| "grad_norm": 4.290796216817974, | |
| "learning_rate": 3.3672253767719485e-08, | |
| "loss": 0.6233, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.9539328102997385, | |
| "grad_norm": 4.023014608954607, | |
| "learning_rate": 3.310038761202839e-08, | |
| "loss": 0.5062, | |
| "step": 2371 | |
| }, | |
| { | |
| "epoch": 0.9543351438342386, | |
| "grad_norm": 4.43210170452369, | |
| "learning_rate": 3.2533386863414784e-08, | |
| "loss": 0.5594, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 0.9547374773687387, | |
| "grad_norm": 3.8979482975499624, | |
| "learning_rate": 3.1971252640160545e-08, | |
| "loss": 0.5128, | |
| "step": 2373 | |
| }, | |
| { | |
| "epoch": 0.9551398109032387, | |
| "grad_norm": 4.162386221402383, | |
| "learning_rate": 3.141398605094992e-08, | |
| "loss": 0.5848, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 0.9555421444377389, | |
| "grad_norm": 4.022603602017269, | |
| "learning_rate": 3.086158819486651e-08, | |
| "loss": 0.5771, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.955944477972239, | |
| "grad_norm": 3.8920410241916863, | |
| "learning_rate": 3.031406016139077e-08, | |
| "loss": 0.5498, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 0.9563468115067391, | |
| "grad_norm": 4.084788242660989, | |
| "learning_rate": 2.9771403030399713e-08, | |
| "loss": 0.598, | |
| "step": 2377 | |
| }, | |
| { | |
| "epoch": 0.9567491450412392, | |
| "grad_norm": 4.048190861072185, | |
| "learning_rate": 2.923361787216278e-08, | |
| "loss": 0.4405, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 0.9571514785757392, | |
| "grad_norm": 4.502700834332798, | |
| "learning_rate": 2.8700705747340963e-08, | |
| "loss": 0.5169, | |
| "step": 2379 | |
| }, | |
| { | |
| "epoch": 0.9575538121102394, | |
| "grad_norm": 4.054509620797073, | |
| "learning_rate": 2.8172667706983792e-08, | |
| "loss": 0.53, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.9579561456447395, | |
| "grad_norm": 3.939484097293469, | |
| "learning_rate": 2.7649504792528204e-08, | |
| "loss": 0.5566, | |
| "step": 2381 | |
| }, | |
| { | |
| "epoch": 0.9583584791792396, | |
| "grad_norm": 4.299563624727568, | |
| "learning_rate": 2.7131218035796324e-08, | |
| "loss": 0.5603, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 0.9587608127137397, | |
| "grad_norm": 3.723520114625019, | |
| "learning_rate": 2.6617808458991867e-08, | |
| "loss": 0.4346, | |
| "step": 2383 | |
| }, | |
| { | |
| "epoch": 0.9591631462482398, | |
| "grad_norm": 3.667012045051573, | |
| "learning_rate": 2.6109277074700955e-08, | |
| "loss": 0.3638, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 0.9595654797827399, | |
| "grad_norm": 3.725825844080702, | |
| "learning_rate": 2.5605624885887414e-08, | |
| "loss": 0.4541, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.95996781331724, | |
| "grad_norm": 3.7094680890029776, | |
| "learning_rate": 2.5106852885892764e-08, | |
| "loss": 0.4693, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 0.96037014685174, | |
| "grad_norm": 3.7229547246513937, | |
| "learning_rate": 2.461296205843261e-08, | |
| "loss": 0.5144, | |
| "step": 2387 | |
| }, | |
| { | |
| "epoch": 0.9607724803862402, | |
| "grad_norm": 4.159612295768075, | |
| "learning_rate": 2.412395337759693e-08, | |
| "loss": 0.522, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 0.9611748139207403, | |
| "grad_norm": 3.8986140359372286, | |
| "learning_rate": 2.3639827807844796e-08, | |
| "loss": 0.5237, | |
| "step": 2389 | |
| }, | |
| { | |
| "epoch": 0.9615771474552404, | |
| "grad_norm": 4.121244015419092, | |
| "learning_rate": 2.3160586304006026e-08, | |
| "loss": 0.538, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.9619794809897405, | |
| "grad_norm": 4.166559231330322, | |
| "learning_rate": 2.268622981127705e-08, | |
| "loss": 0.5822, | |
| "step": 2391 | |
| }, | |
| { | |
| "epoch": 0.9623818145242405, | |
| "grad_norm": 3.9211071919172813, | |
| "learning_rate": 2.2216759265220044e-08, | |
| "loss": 0.4897, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 0.9627841480587407, | |
| "grad_norm": 4.349097825226008, | |
| "learning_rate": 2.1752175591760184e-08, | |
| "loss": 0.5211, | |
| "step": 2393 | |
| }, | |
| { | |
| "epoch": 0.9631864815932408, | |
| "grad_norm": 3.9282047251137677, | |
| "learning_rate": 2.1292479707184787e-08, | |
| "loss": 0.5741, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 0.9635888151277409, | |
| "grad_norm": 4.159989263744697, | |
| "learning_rate": 2.0837672518141395e-08, | |
| "loss": 0.5238, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.963991148662241, | |
| "grad_norm": 3.8198728228381293, | |
| "learning_rate": 2.0387754921634972e-08, | |
| "loss": 0.5144, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 0.9643934821967411, | |
| "grad_norm": 3.346255200741844, | |
| "learning_rate": 1.9942727805027373e-08, | |
| "loss": 0.4384, | |
| "step": 2397 | |
| }, | |
| { | |
| "epoch": 0.9647958157312412, | |
| "grad_norm": 3.8469019196516845, | |
| "learning_rate": 1.9502592046035107e-08, | |
| "loss": 0.486, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 0.9651981492657413, | |
| "grad_norm": 3.5776773638982893, | |
| "learning_rate": 1.9067348512726847e-08, | |
| "loss": 0.4414, | |
| "step": 2399 | |
| }, | |
| { | |
| "epoch": 0.9656004828002414, | |
| "grad_norm": 4.031486537261451, | |
| "learning_rate": 1.8636998063523705e-08, | |
| "loss": 0.5328, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9660028163347415, | |
| "grad_norm": 4.260664576753229, | |
| "learning_rate": 1.8211541547195067e-08, | |
| "loss": 0.5136, | |
| "step": 2401 | |
| }, | |
| { | |
| "epoch": 0.9664051498692416, | |
| "grad_norm": 4.152186034058617, | |
| "learning_rate": 1.7790979802858876e-08, | |
| "loss": 0.6316, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 0.9668074834037417, | |
| "grad_norm": 3.995357070807909, | |
| "learning_rate": 1.7375313659978855e-08, | |
| "loss": 0.5377, | |
| "step": 2403 | |
| }, | |
| { | |
| "epoch": 0.9672098169382418, | |
| "grad_norm": 3.4651763373212474, | |
| "learning_rate": 1.6964543938363388e-08, | |
| "loss": 0.4157, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 0.9676121504727418, | |
| "grad_norm": 4.360198459376172, | |
| "learning_rate": 1.6558671448163866e-08, | |
| "loss": 0.6273, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.968014484007242, | |
| "grad_norm": 4.006498747343065, | |
| "learning_rate": 1.6157696989873017e-08, | |
| "loss": 0.4914, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 0.9684168175417421, | |
| "grad_norm": 3.7398733702546822, | |
| "learning_rate": 1.5761621354322965e-08, | |
| "loss": 0.4022, | |
| "step": 2407 | |
| }, | |
| { | |
| "epoch": 0.9688191510762422, | |
| "grad_norm": 4.1110217996882294, | |
| "learning_rate": 1.5370445322684392e-08, | |
| "loss": 0.4698, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 0.9692214846107423, | |
| "grad_norm": 4.362254470529691, | |
| "learning_rate": 1.498416966646432e-08, | |
| "loss": 0.5598, | |
| "step": 2409 | |
| }, | |
| { | |
| "epoch": 0.9696238181452425, | |
| "grad_norm": 3.8880330136972328, | |
| "learning_rate": 1.4602795147504734e-08, | |
| "loss": 0.473, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.9700261516797425, | |
| "grad_norm": 4.110874787245324, | |
| "learning_rate": 1.4226322517981728e-08, | |
| "loss": 0.4901, | |
| "step": 2411 | |
| }, | |
| { | |
| "epoch": 0.9704284852142426, | |
| "grad_norm": 4.138814644855194, | |
| "learning_rate": 1.3854752520403303e-08, | |
| "loss": 0.5618, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 0.9708308187487427, | |
| "grad_norm": 4.481371755440245, | |
| "learning_rate": 1.3488085887607972e-08, | |
| "loss": 0.6152, | |
| "step": 2413 | |
| }, | |
| { | |
| "epoch": 0.9712331522832428, | |
| "grad_norm": 3.6050373310270176, | |
| "learning_rate": 1.3126323342763648e-08, | |
| "loss": 0.5525, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 0.971635485817743, | |
| "grad_norm": 4.061084112826385, | |
| "learning_rate": 1.2769465599365982e-08, | |
| "loss": 0.4619, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.972037819352243, | |
| "grad_norm": 3.8920028399978097, | |
| "learning_rate": 1.24175133612367e-08, | |
| "loss": 0.4833, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 0.9724401528867431, | |
| "grad_norm": 3.909035344776143, | |
| "learning_rate": 1.2070467322522762e-08, | |
| "loss": 0.5188, | |
| "step": 2417 | |
| }, | |
| { | |
| "epoch": 0.9728424864212432, | |
| "grad_norm": 4.225663392112197, | |
| "learning_rate": 1.1728328167695258e-08, | |
| "loss": 0.6696, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 0.9732448199557433, | |
| "grad_norm": 4.6741890030042725, | |
| "learning_rate": 1.1391096571546633e-08, | |
| "loss": 0.5815, | |
| "step": 2419 | |
| }, | |
| { | |
| "epoch": 0.9736471534902434, | |
| "grad_norm": 4.51608876386972, | |
| "learning_rate": 1.1058773199190965e-08, | |
| "loss": 0.5173, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.9740494870247435, | |
| "grad_norm": 4.0654982873308665, | |
| "learning_rate": 1.0731358706061456e-08, | |
| "loss": 0.5749, | |
| "step": 2421 | |
| }, | |
| { | |
| "epoch": 0.9744518205592436, | |
| "grad_norm": 4.2304161552160835, | |
| "learning_rate": 1.0408853737909896e-08, | |
| "loss": 0.5813, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 0.9748541540937438, | |
| "grad_norm": 3.4742799346124493, | |
| "learning_rate": 1.0091258930805259e-08, | |
| "loss": 0.4705, | |
| "step": 2423 | |
| }, | |
| { | |
| "epoch": 0.9752564876282438, | |
| "grad_norm": 4.08672880848899, | |
| "learning_rate": 9.778574911132044e-09, | |
| "loss": 0.516, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 0.9756588211627439, | |
| "grad_norm": 3.669930620914309, | |
| "learning_rate": 9.470802295589442e-09, | |
| "loss": 0.4138, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.976061154697244, | |
| "grad_norm": 3.5860479619434673, | |
| "learning_rate": 9.16794169119023e-09, | |
| "loss": 0.4224, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 0.9764634882317441, | |
| "grad_norm": 4.642054584241187, | |
| "learning_rate": 8.869993695258816e-09, | |
| "loss": 0.611, | |
| "step": 2427 | |
| }, | |
| { | |
| "epoch": 0.9768658217662443, | |
| "grad_norm": 3.977155407637389, | |
| "learning_rate": 8.576958895431531e-09, | |
| "loss": 0.5681, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 0.9772681553007443, | |
| "grad_norm": 3.6884328747774857, | |
| "learning_rate": 8.28883786965301e-09, | |
| "loss": 0.4945, | |
| "step": 2429 | |
| }, | |
| { | |
| "epoch": 0.9776704888352444, | |
| "grad_norm": 4.023502331465119, | |
| "learning_rate": 8.005631186177864e-09, | |
| "loss": 0.4201, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9780728223697445, | |
| "grad_norm": 3.901800973146699, | |
| "learning_rate": 7.727339403568179e-09, | |
| "loss": 0.4977, | |
| "step": 2431 | |
| }, | |
| { | |
| "epoch": 0.9784751559042446, | |
| "grad_norm": 3.9967038572140856, | |
| "learning_rate": 7.45396307069185e-09, | |
| "loss": 0.6418, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 0.9788774894387448, | |
| "grad_norm": 4.291073567899429, | |
| "learning_rate": 7.185502726722305e-09, | |
| "loss": 0.6601, | |
| "step": 2433 | |
| }, | |
| { | |
| "epoch": 0.9792798229732448, | |
| "grad_norm": 3.974019553234897, | |
| "learning_rate": 6.921958901137948e-09, | |
| "loss": 0.4781, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 0.9796821565077449, | |
| "grad_norm": 3.833029352112665, | |
| "learning_rate": 6.66333211371939e-09, | |
| "loss": 0.4373, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.9800844900422451, | |
| "grad_norm": 4.398810707509668, | |
| "learning_rate": 6.409622874550547e-09, | |
| "loss": 0.5795, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 0.9804868235767451, | |
| "grad_norm": 3.5697882170733557, | |
| "learning_rate": 6.160831684015878e-09, | |
| "loss": 0.398, | |
| "step": 2437 | |
| }, | |
| { | |
| "epoch": 0.9808891571112452, | |
| "grad_norm": 3.904433602202901, | |
| "learning_rate": 5.9169590328003735e-09, | |
| "loss": 0.5029, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 0.9812914906457453, | |
| "grad_norm": 3.7353800882065715, | |
| "learning_rate": 5.678005401888176e-09, | |
| "loss": 0.4911, | |
| "step": 2439 | |
| }, | |
| { | |
| "epoch": 0.9816938241802454, | |
| "grad_norm": 3.795949925181833, | |
| "learning_rate": 5.443971262561465e-09, | |
| "loss": 0.4405, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9820961577147456, | |
| "grad_norm": 3.617609885039579, | |
| "learning_rate": 5.214857076400736e-09, | |
| "loss": 0.4329, | |
| "step": 2441 | |
| }, | |
| { | |
| "epoch": 0.9824984912492456, | |
| "grad_norm": 3.5249358490195637, | |
| "learning_rate": 4.990663295282305e-09, | |
| "loss": 0.4285, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 0.9829008247837457, | |
| "grad_norm": 4.142775577116706, | |
| "learning_rate": 4.771390361377748e-09, | |
| "loss": 0.4777, | |
| "step": 2443 | |
| }, | |
| { | |
| "epoch": 0.9833031583182458, | |
| "grad_norm": 4.312344250176257, | |
| "learning_rate": 4.5570387071544595e-09, | |
| "loss": 0.6627, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 0.9837054918527459, | |
| "grad_norm": 3.67386973137855, | |
| "learning_rate": 4.347608755372601e-09, | |
| "loss": 0.4431, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.9841078253872461, | |
| "grad_norm": 4.191733748165821, | |
| "learning_rate": 4.143100919086207e-09, | |
| "loss": 0.4884, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 0.9845101589217461, | |
| "grad_norm": 4.38016174030762, | |
| "learning_rate": 3.943515601640968e-09, | |
| "loss": 0.5822, | |
| "step": 2447 | |
| }, | |
| { | |
| "epoch": 0.9849124924562462, | |
| "grad_norm": 4.227081529941771, | |
| "learning_rate": 3.748853196673952e-09, | |
| "loss": 0.49, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 0.9853148259907464, | |
| "grad_norm": 3.6169704926991124, | |
| "learning_rate": 3.5591140881138797e-09, | |
| "loss": 0.5642, | |
| "step": 2449 | |
| }, | |
| { | |
| "epoch": 0.9857171595252464, | |
| "grad_norm": 3.228283450712886, | |
| "learning_rate": 3.374298650178076e-09, | |
| "loss": 0.3926, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9861194930597466, | |
| "grad_norm": 3.996805720599477, | |
| "learning_rate": 3.194407247373854e-09, | |
| "loss": 0.5175, | |
| "step": 2451 | |
| }, | |
| { | |
| "epoch": 0.9865218265942466, | |
| "grad_norm": 3.786750548322366, | |
| "learning_rate": 3.0194402344965733e-09, | |
| "loss": 0.4496, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 0.9869241601287467, | |
| "grad_norm": 3.730366038034417, | |
| "learning_rate": 2.8493979566296403e-09, | |
| "loss": 0.4701, | |
| "step": 2453 | |
| }, | |
| { | |
| "epoch": 0.9873264936632469, | |
| "grad_norm": 4.616677722561612, | |
| "learning_rate": 2.6842807491433974e-09, | |
| "loss": 0.6207, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 0.9877288271977469, | |
| "grad_norm": 3.9983804243694645, | |
| "learning_rate": 2.524088937694291e-09, | |
| "loss": 0.4734, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.988131160732247, | |
| "grad_norm": 4.364062652259154, | |
| "learning_rate": 2.3688228382251487e-09, | |
| "loss": 0.6254, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 0.9885334942667471, | |
| "grad_norm": 3.97492888774164, | |
| "learning_rate": 2.218482756963236e-09, | |
| "loss": 0.5645, | |
| "step": 2457 | |
| }, | |
| { | |
| "epoch": 0.9889358278012472, | |
| "grad_norm": 3.9671569540217493, | |
| "learning_rate": 2.07306899042109e-09, | |
| "loss": 0.4821, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 0.9893381613357474, | |
| "grad_norm": 3.808293451273474, | |
| "learning_rate": 1.9325818253945748e-09, | |
| "loss": 0.4502, | |
| "step": 2459 | |
| }, | |
| { | |
| "epoch": 0.9897404948702474, | |
| "grad_norm": 4.467378355634954, | |
| "learning_rate": 1.7970215389628842e-09, | |
| "loss": 0.5963, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9901428284047475, | |
| "grad_norm": 4.625199727103312, | |
| "learning_rate": 1.6663883984888163e-09, | |
| "loss": 0.6419, | |
| "step": 2461 | |
| }, | |
| { | |
| "epoch": 0.9905451619392477, | |
| "grad_norm": 3.9326948253065184, | |
| "learning_rate": 1.5406826616168324e-09, | |
| "loss": 0.5575, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 0.9909474954737477, | |
| "grad_norm": 4.4568921518738485, | |
| "learning_rate": 1.4199045762730569e-09, | |
| "loss": 0.6483, | |
| "step": 2463 | |
| }, | |
| { | |
| "epoch": 0.9913498290082479, | |
| "grad_norm": 4.351427492104435, | |
| "learning_rate": 1.3040543806658313e-09, | |
| "loss": 0.5948, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 0.9917521625427479, | |
| "grad_norm": 3.4818450271582857, | |
| "learning_rate": 1.1931323032832177e-09, | |
| "loss": 0.3535, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.992154496077248, | |
| "grad_norm": 4.11227816177031, | |
| "learning_rate": 1.0871385628941077e-09, | |
| "loss": 0.5317, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 0.9925568296117482, | |
| "grad_norm": 4.418916055074932, | |
| "learning_rate": 9.860733685479463e-10, | |
| "loss": 0.6433, | |
| "step": 2467 | |
| }, | |
| { | |
| "epoch": 0.9929591631462482, | |
| "grad_norm": 3.6128250845259724, | |
| "learning_rate": 8.899369195727869e-10, | |
| "loss": 0.5447, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 0.9933614966807484, | |
| "grad_norm": 4.364090118207612, | |
| "learning_rate": 7.987294055761264e-10, | |
| "loss": 0.6099, | |
| "step": 2469 | |
| }, | |
| { | |
| "epoch": 0.9937638302152484, | |
| "grad_norm": 3.739315819157321, | |
| "learning_rate": 7.124510064446255e-10, | |
| "loss": 0.4998, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.9941661637497485, | |
| "grad_norm": 4.035011038094296, | |
| "learning_rate": 6.311018923432776e-10, | |
| "loss": 0.4558, | |
| "step": 2471 | |
| }, | |
| { | |
| "epoch": 0.9945684972842487, | |
| "grad_norm": 3.577550936704116, | |
| "learning_rate": 5.546822237145755e-10, | |
| "loss": 0.4408, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 0.9949708308187487, | |
| "grad_norm": 3.7748701163123584, | |
| "learning_rate": 4.831921512796211e-10, | |
| "loss": 0.4403, | |
| "step": 2473 | |
| }, | |
| { | |
| "epoch": 0.9953731643532489, | |
| "grad_norm": 3.7403701320267686, | |
| "learning_rate": 4.166318160361837e-10, | |
| "loss": 0.4989, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 0.995775497887749, | |
| "grad_norm": 3.8787870872673857, | |
| "learning_rate": 3.5500134925980925e-10, | |
| "loss": 0.5066, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.996177831422249, | |
| "grad_norm": 4.321954937291901, | |
| "learning_rate": 2.983008725029879e-10, | |
| "loss": 0.498, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 0.9965801649567492, | |
| "grad_norm": 3.574339539948393, | |
| "learning_rate": 2.4653049759432167e-10, | |
| "loss": 0.4164, | |
| "step": 2477 | |
| }, | |
| { | |
| "epoch": 0.9969824984912492, | |
| "grad_norm": 3.7965194640316478, | |
| "learning_rate": 1.9969032663935683e-10, | |
| "loss": 0.4769, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 0.9973848320257493, | |
| "grad_norm": 4.23022008583803, | |
| "learning_rate": 1.5778045202002878e-10, | |
| "loss": 0.4481, | |
| "step": 2479 | |
| }, | |
| { | |
| "epoch": 0.9977871655602495, | |
| "grad_norm": 4.071535851310751, | |
| "learning_rate": 1.2080095639410704e-10, | |
| "loss": 0.535, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.9981894990947495, | |
| "grad_norm": 3.9007108087469646, | |
| "learning_rate": 8.875191269547279e-11, | |
| "loss": 0.542, | |
| "step": 2481 | |
| }, | |
| { | |
| "epoch": 0.9985918326292497, | |
| "grad_norm": 4.05416067842318, | |
| "learning_rate": 6.163338413328612e-11, | |
| "loss": 0.5738, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 0.9989941661637497, | |
| "grad_norm": 3.9020981788420386, | |
| "learning_rate": 3.944542419337394e-11, | |
| "loss": 0.4897, | |
| "step": 2483 | |
| }, | |
| { | |
| "epoch": 0.9993964996982498, | |
| "grad_norm": 3.6120331011996085, | |
| "learning_rate": 2.2188076636009415e-11, | |
| "loss": 0.5195, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 0.99979883323275, | |
| "grad_norm": 3.4028120769775385, | |
| "learning_rate": 9.861375497577375e-12, | |
| "loss": 0.3763, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.4028120769775385, | |
| "learning_rate": 2.4653450900191666e-12, | |
| "loss": 0.4064, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 2486, | |
| "total_flos": 882515131957248.0, | |
| "train_loss": 0.5852410430066162, | |
| "train_runtime": 194010.5784, | |
| "train_samples_per_second": 1.64, | |
| "train_steps_per_second": 0.013 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 2486, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 882515131957248.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |