{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 5676, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003524229074889868, "grad_norm": 1.6512674233185107, "learning_rate": 0.0, "loss": 1.493973731994629, "step": 1 }, { "epoch": 0.0007048458149779736, "grad_norm": 1.4463228571593894, "learning_rate": 7.042253521126761e-08, "loss": 1.3692013025283813, "step": 2 }, { "epoch": 0.0010572687224669603, "grad_norm": 1.4036766254408197, "learning_rate": 1.4084507042253522e-07, "loss": 1.3996260166168213, "step": 3 }, { "epoch": 0.0014096916299559472, "grad_norm": 1.29446596506829, "learning_rate": 2.1126760563380284e-07, "loss": 1.3011515140533447, "step": 4 }, { "epoch": 0.001762114537444934, "grad_norm": 1.5130555881795185, "learning_rate": 2.8169014084507043e-07, "loss": 1.3736083507537842, "step": 5 }, { "epoch": 0.0021145374449339205, "grad_norm": 1.247517750517551, "learning_rate": 3.521126760563381e-07, "loss": 1.051241159439087, "step": 6 }, { "epoch": 0.0024669603524229075, "grad_norm": 1.611437944890658, "learning_rate": 4.225352112676057e-07, "loss": 1.2594621181488037, "step": 7 }, { "epoch": 0.0028193832599118945, "grad_norm": 1.4604380967241444, "learning_rate": 4.929577464788733e-07, "loss": 1.0498416423797607, "step": 8 }, { "epoch": 0.003171806167400881, "grad_norm": 1.367174801368101, "learning_rate": 5.633802816901409e-07, "loss": 1.3313459157943726, "step": 9 }, { "epoch": 0.003524229074889868, "grad_norm": 1.4378623823320218, "learning_rate": 6.338028169014085e-07, "loss": 1.2484922409057617, "step": 10 }, { "epoch": 0.0038766519823788545, "grad_norm": 1.197911167360161, "learning_rate": 7.042253521126762e-07, "loss": 1.097194790840149, "step": 11 }, { "epoch": 0.004229074889867841, "grad_norm": 1.3767897701080816, "learning_rate": 7.746478873239437e-07, "loss": 1.3065136671066284, "step": 12 }, { "epoch": 0.0045814977973568285, "grad_norm": 1.2501177622273731, "learning_rate": 8.450704225352114e-07, "loss": 1.1574026346206665, "step": 13 }, { "epoch": 0.004933920704845815, "grad_norm": 1.3002699887597202, "learning_rate": 9.154929577464789e-07, "loss": 1.1509445905685425, "step": 14 }, { "epoch": 0.0052863436123348016, "grad_norm": 1.3458236321153771, "learning_rate": 9.859154929577465e-07, "loss": 1.069403886795044, "step": 15 }, { "epoch": 0.005638766519823789, "grad_norm": 1.52712721337833, "learning_rate": 1.0563380281690142e-06, "loss": 1.1731287240982056, "step": 16 }, { "epoch": 0.0059911894273127755, "grad_norm": 1.5628075837505453, "learning_rate": 1.1267605633802817e-06, "loss": 0.9314254522323608, "step": 17 }, { "epoch": 0.006343612334801762, "grad_norm": 1.3686084350519343, "learning_rate": 1.1971830985915492e-06, "loss": 1.2915008068084717, "step": 18 }, { "epoch": 0.006696035242290749, "grad_norm": 1.2653916141417434, "learning_rate": 1.267605633802817e-06, "loss": 1.1088309288024902, "step": 19 }, { "epoch": 0.007048458149779736, "grad_norm": 1.362753082153478, "learning_rate": 1.3380281690140844e-06, "loss": 1.21511709690094, "step": 20 }, { "epoch": 0.0074008810572687225, "grad_norm": 1.3054604275805306, "learning_rate": 1.4084507042253523e-06, "loss": 1.241409420967102, "step": 21 }, { "epoch": 0.007753303964757709, "grad_norm": 1.3646723208790772, "learning_rate": 1.4788732394366198e-06, "loss": 1.2170014381408691, "step": 22 }, { "epoch": 0.008105726872246696, "grad_norm": 1.424586503093174, "learning_rate": 1.5492957746478873e-06, "loss": 1.1405870914459229, "step": 23 }, { "epoch": 0.008458149779735682, "grad_norm": 1.429368633092772, "learning_rate": 1.6197183098591552e-06, "loss": 1.122542381286621, "step": 24 }, { "epoch": 0.00881057268722467, "grad_norm": 1.2201478884239083, "learning_rate": 1.6901408450704227e-06, "loss": 1.1686937808990479, "step": 25 }, { "epoch": 0.009162995594713657, "grad_norm": 1.4065678272985154, "learning_rate": 1.7605633802816902e-06, "loss": 1.215955376625061, "step": 26 }, { "epoch": 0.009515418502202643, "grad_norm": 1.3879787249393913, "learning_rate": 1.8309859154929579e-06, "loss": 1.075179100036621, "step": 27 }, { "epoch": 0.00986784140969163, "grad_norm": 1.2313632017619234, "learning_rate": 1.9014084507042254e-06, "loss": 1.198237419128418, "step": 28 }, { "epoch": 0.010220264317180617, "grad_norm": 1.6833211669458825, "learning_rate": 1.971830985915493e-06, "loss": 1.2356700897216797, "step": 29 }, { "epoch": 0.010572687224669603, "grad_norm": 1.3637967517131555, "learning_rate": 2.0422535211267608e-06, "loss": 1.2373592853546143, "step": 30 }, { "epoch": 0.01092511013215859, "grad_norm": 1.377232613936239, "learning_rate": 2.1126760563380285e-06, "loss": 1.1857718229293823, "step": 31 }, { "epoch": 0.011277533039647578, "grad_norm": 1.3566319214936433, "learning_rate": 2.1830985915492958e-06, "loss": 1.1844017505645752, "step": 32 }, { "epoch": 0.011629955947136564, "grad_norm": 1.2486508447822717, "learning_rate": 2.2535211267605635e-06, "loss": 1.275226354598999, "step": 33 }, { "epoch": 0.011982378854625551, "grad_norm": 1.3044888735575617, "learning_rate": 2.323943661971831e-06, "loss": 1.169473648071289, "step": 34 }, { "epoch": 0.012334801762114538, "grad_norm": 1.2608655384056326, "learning_rate": 2.3943661971830984e-06, "loss": 1.2182841300964355, "step": 35 }, { "epoch": 0.012687224669603524, "grad_norm": 1.3780698009940295, "learning_rate": 2.4647887323943666e-06, "loss": 1.2110469341278076, "step": 36 }, { "epoch": 0.01303964757709251, "grad_norm": 1.3829042894220551, "learning_rate": 2.535211267605634e-06, "loss": 1.2886571884155273, "step": 37 }, { "epoch": 0.013392070484581497, "grad_norm": 1.2954566526081723, "learning_rate": 2.6056338028169015e-06, "loss": 1.0740901231765747, "step": 38 }, { "epoch": 0.013744493392070485, "grad_norm": 1.2079072281757672, "learning_rate": 2.676056338028169e-06, "loss": 1.0119279623031616, "step": 39 }, { "epoch": 0.014096916299559472, "grad_norm": 1.1460333237155051, "learning_rate": 2.746478873239437e-06, "loss": 1.0752044916152954, "step": 40 }, { "epoch": 0.014449339207048459, "grad_norm": 1.3690776364650978, "learning_rate": 2.8169014084507046e-06, "loss": 1.345343828201294, "step": 41 }, { "epoch": 0.014801762114537445, "grad_norm": 1.0813865739605455, "learning_rate": 2.887323943661972e-06, "loss": 1.102332353591919, "step": 42 }, { "epoch": 0.015154185022026432, "grad_norm": 1.1643083589428873, "learning_rate": 2.9577464788732396e-06, "loss": 1.006919264793396, "step": 43 }, { "epoch": 0.015506607929515418, "grad_norm": 1.1582412568670832, "learning_rate": 3.0281690140845073e-06, "loss": 1.104026436805725, "step": 44 }, { "epoch": 0.015859030837004406, "grad_norm": 1.3060563783851553, "learning_rate": 3.0985915492957746e-06, "loss": 1.299152135848999, "step": 45 }, { "epoch": 0.01621145374449339, "grad_norm": 1.4304085919726754, "learning_rate": 3.1690140845070427e-06, "loss": 1.1075072288513184, "step": 46 }, { "epoch": 0.01656387665198238, "grad_norm": 0.9865545367526579, "learning_rate": 3.2394366197183104e-06, "loss": 1.0296107530593872, "step": 47 }, { "epoch": 0.016916299559471364, "grad_norm": 1.1960961939132708, "learning_rate": 3.3098591549295777e-06, "loss": 1.1097803115844727, "step": 48 }, { "epoch": 0.017268722466960353, "grad_norm": 1.0974682037636356, "learning_rate": 3.3802816901408454e-06, "loss": 0.945678174495697, "step": 49 }, { "epoch": 0.01762114537444934, "grad_norm": 0.9924343523024514, "learning_rate": 3.4507042253521127e-06, "loss": 1.075556993484497, "step": 50 }, { "epoch": 0.017973568281938326, "grad_norm": 1.0849849170905757, "learning_rate": 3.5211267605633804e-06, "loss": 1.0790367126464844, "step": 51 }, { "epoch": 0.018325991189427314, "grad_norm": 1.220415189867698, "learning_rate": 3.5915492957746485e-06, "loss": 1.2567799091339111, "step": 52 }, { "epoch": 0.0186784140969163, "grad_norm": 1.1058732491316554, "learning_rate": 3.6619718309859158e-06, "loss": 1.1437780857086182, "step": 53 }, { "epoch": 0.019030837004405287, "grad_norm": 1.0871981925234313, "learning_rate": 3.7323943661971835e-06, "loss": 1.0962307453155518, "step": 54 }, { "epoch": 0.019383259911894272, "grad_norm": 0.9603250960542756, "learning_rate": 3.8028169014084508e-06, "loss": 1.0149122476577759, "step": 55 }, { "epoch": 0.01973568281938326, "grad_norm": 0.9630324155849409, "learning_rate": 3.873239436619718e-06, "loss": 0.9029096364974976, "step": 56 }, { "epoch": 0.02008810572687225, "grad_norm": 1.1449327271146603, "learning_rate": 3.943661971830986e-06, "loss": 1.1290819644927979, "step": 57 }, { "epoch": 0.020440528634361233, "grad_norm": 1.1046082203063978, "learning_rate": 4.014084507042254e-06, "loss": 1.0965365171432495, "step": 58 }, { "epoch": 0.02079295154185022, "grad_norm": 1.2553158733514387, "learning_rate": 4.0845070422535216e-06, "loss": 1.2854020595550537, "step": 59 }, { "epoch": 0.021145374449339206, "grad_norm": 1.0484971235480365, "learning_rate": 4.154929577464789e-06, "loss": 1.0303996801376343, "step": 60 }, { "epoch": 0.021497797356828195, "grad_norm": 0.9670460326314384, "learning_rate": 4.225352112676057e-06, "loss": 1.0811198949813843, "step": 61 }, { "epoch": 0.02185022026431718, "grad_norm": 0.992548164971829, "learning_rate": 4.295774647887324e-06, "loss": 1.1373648643493652, "step": 62 }, { "epoch": 0.022202643171806168, "grad_norm": 1.009220008285868, "learning_rate": 4.3661971830985915e-06, "loss": 0.8717563152313232, "step": 63 }, { "epoch": 0.022555066079295156, "grad_norm": 0.9171432664885892, "learning_rate": 4.43661971830986e-06, "loss": 0.9939290881156921, "step": 64 }, { "epoch": 0.02290748898678414, "grad_norm": 1.139248361968882, "learning_rate": 4.507042253521127e-06, "loss": 1.1776926517486572, "step": 65 }, { "epoch": 0.02325991189427313, "grad_norm": 0.8971048282009709, "learning_rate": 4.577464788732395e-06, "loss": 0.9149726629257202, "step": 66 }, { "epoch": 0.023612334801762114, "grad_norm": 0.9597323965843616, "learning_rate": 4.647887323943662e-06, "loss": 0.996609091758728, "step": 67 }, { "epoch": 0.023964757709251102, "grad_norm": 1.0074979173506051, "learning_rate": 4.71830985915493e-06, "loss": 1.102593183517456, "step": 68 }, { "epoch": 0.024317180616740087, "grad_norm": 0.8938780612317906, "learning_rate": 4.788732394366197e-06, "loss": 1.0912048816680908, "step": 69 }, { "epoch": 0.024669603524229075, "grad_norm": 0.936561005612989, "learning_rate": 4.859154929577465e-06, "loss": 1.1192498207092285, "step": 70 }, { "epoch": 0.025022026431718063, "grad_norm": 1.059387656590118, "learning_rate": 4.929577464788733e-06, "loss": 1.0358459949493408, "step": 71 }, { "epoch": 0.025374449339207048, "grad_norm": 0.9588756664450253, "learning_rate": 5e-06, "loss": 1.076169490814209, "step": 72 }, { "epoch": 0.025726872246696036, "grad_norm": 1.355361750045824, "learning_rate": 5.070422535211268e-06, "loss": 0.9906084537506104, "step": 73 }, { "epoch": 0.02607929515418502, "grad_norm": 1.187443908189842, "learning_rate": 5.140845070422536e-06, "loss": 0.8163654804229736, "step": 74 }, { "epoch": 0.02643171806167401, "grad_norm": 0.9764403954844053, "learning_rate": 5.211267605633803e-06, "loss": 1.140099048614502, "step": 75 }, { "epoch": 0.026784140969162994, "grad_norm": 0.863156257130764, "learning_rate": 5.28169014084507e-06, "loss": 0.7654916048049927, "step": 76 }, { "epoch": 0.027136563876651983, "grad_norm": 1.0935626721226286, "learning_rate": 5.352112676056338e-06, "loss": 0.9476499557495117, "step": 77 }, { "epoch": 0.02748898678414097, "grad_norm": 1.0875377862843238, "learning_rate": 5.422535211267607e-06, "loss": 1.120811939239502, "step": 78 }, { "epoch": 0.027841409691629956, "grad_norm": 0.9578356099138406, "learning_rate": 5.492957746478874e-06, "loss": 0.9745736122131348, "step": 79 }, { "epoch": 0.028193832599118944, "grad_norm": 1.074452545035149, "learning_rate": 5.563380281690142e-06, "loss": 1.086181879043579, "step": 80 }, { "epoch": 0.02854625550660793, "grad_norm": 0.9510910839018534, "learning_rate": 5.633802816901409e-06, "loss": 0.9904681444168091, "step": 81 }, { "epoch": 0.028898678414096917, "grad_norm": 1.0958191892945044, "learning_rate": 5.7042253521126766e-06, "loss": 1.0311436653137207, "step": 82 }, { "epoch": 0.029251101321585902, "grad_norm": 1.066849780091366, "learning_rate": 5.774647887323944e-06, "loss": 0.996998131275177, "step": 83 }, { "epoch": 0.02960352422907489, "grad_norm": 1.1397250117300832, "learning_rate": 5.845070422535212e-06, "loss": 1.1526594161987305, "step": 84 }, { "epoch": 0.029955947136563875, "grad_norm": 1.145115830862378, "learning_rate": 5.915492957746479e-06, "loss": 1.0914695262908936, "step": 85 }, { "epoch": 0.030308370044052863, "grad_norm": 1.1551265503738541, "learning_rate": 5.9859154929577465e-06, "loss": 0.9558745622634888, "step": 86 }, { "epoch": 0.03066079295154185, "grad_norm": 1.0734015706063305, "learning_rate": 6.056338028169015e-06, "loss": 0.9668983221054077, "step": 87 }, { "epoch": 0.031013215859030836, "grad_norm": 1.2231010171085557, "learning_rate": 6.126760563380282e-06, "loss": 1.0132758617401123, "step": 88 }, { "epoch": 0.03136563876651982, "grad_norm": 0.9911917466596063, "learning_rate": 6.197183098591549e-06, "loss": 1.0816935300827026, "step": 89 }, { "epoch": 0.03171806167400881, "grad_norm": 1.1311803004782939, "learning_rate": 6.267605633802818e-06, "loss": 1.03245210647583, "step": 90 }, { "epoch": 0.0320704845814978, "grad_norm": 1.0820347157611818, "learning_rate": 6.3380281690140855e-06, "loss": 0.9812602400779724, "step": 91 }, { "epoch": 0.03242290748898678, "grad_norm": 1.0039329578342324, "learning_rate": 6.408450704225353e-06, "loss": 0.9303219318389893, "step": 92 }, { "epoch": 0.032775330396475774, "grad_norm": 1.0308477549900932, "learning_rate": 6.478873239436621e-06, "loss": 1.175403356552124, "step": 93 }, { "epoch": 0.03312775330396476, "grad_norm": 1.0822064194108554, "learning_rate": 6.549295774647888e-06, "loss": 1.1863958835601807, "step": 94 }, { "epoch": 0.033480176211453744, "grad_norm": 1.0560683839166303, "learning_rate": 6.619718309859155e-06, "loss": 0.8630557060241699, "step": 95 }, { "epoch": 0.03383259911894273, "grad_norm": 1.1683225259655636, "learning_rate": 6.690140845070423e-06, "loss": 1.0499619245529175, "step": 96 }, { "epoch": 0.03418502202643172, "grad_norm": 0.9766019012274652, "learning_rate": 6.760563380281691e-06, "loss": 0.9443086981773376, "step": 97 }, { "epoch": 0.034537444933920705, "grad_norm": 1.1779098792527396, "learning_rate": 6.830985915492958e-06, "loss": 1.0011450052261353, "step": 98 }, { "epoch": 0.03488986784140969, "grad_norm": 1.0941166094999715, "learning_rate": 6.901408450704225e-06, "loss": 1.0239083766937256, "step": 99 }, { "epoch": 0.03524229074889868, "grad_norm": 1.1605003575433563, "learning_rate": 6.9718309859154935e-06, "loss": 1.1335347890853882, "step": 100 }, { "epoch": 0.035594713656387666, "grad_norm": 1.1410420869639502, "learning_rate": 7.042253521126761e-06, "loss": 0.9650854468345642, "step": 101 }, { "epoch": 0.03594713656387665, "grad_norm": 1.0366491839089684, "learning_rate": 7.112676056338029e-06, "loss": 0.9284406900405884, "step": 102 }, { "epoch": 0.036299559471365636, "grad_norm": 1.0368314441443032, "learning_rate": 7.183098591549297e-06, "loss": 0.989676296710968, "step": 103 }, { "epoch": 0.03665198237885463, "grad_norm": 1.0475480945800932, "learning_rate": 7.253521126760564e-06, "loss": 0.9149842262268066, "step": 104 }, { "epoch": 0.03700440528634361, "grad_norm": 1.0115568298427282, "learning_rate": 7.3239436619718316e-06, "loss": 0.9793657064437866, "step": 105 }, { "epoch": 0.0373568281938326, "grad_norm": 1.0923401558071288, "learning_rate": 7.3943661971831e-06, "loss": 0.9508543014526367, "step": 106 }, { "epoch": 0.03770925110132159, "grad_norm": 1.123373083563155, "learning_rate": 7.464788732394367e-06, "loss": 1.0623283386230469, "step": 107 }, { "epoch": 0.038061674008810574, "grad_norm": 1.0472469474411819, "learning_rate": 7.535211267605634e-06, "loss": 1.0039314031600952, "step": 108 }, { "epoch": 0.03841409691629956, "grad_norm": 1.1301029490219276, "learning_rate": 7.6056338028169015e-06, "loss": 0.9315502643585205, "step": 109 }, { "epoch": 0.038766519823788544, "grad_norm": 0.9958020122553335, "learning_rate": 7.67605633802817e-06, "loss": 0.936677098274231, "step": 110 }, { "epoch": 0.039118942731277535, "grad_norm": 1.0684105284421879, "learning_rate": 7.746478873239436e-06, "loss": 0.9373410940170288, "step": 111 }, { "epoch": 0.03947136563876652, "grad_norm": 1.155598878121798, "learning_rate": 7.816901408450704e-06, "loss": 1.01617431640625, "step": 112 }, { "epoch": 0.039823788546255505, "grad_norm": 1.1889006080727076, "learning_rate": 7.887323943661972e-06, "loss": 1.1713547706604004, "step": 113 }, { "epoch": 0.0401762114537445, "grad_norm": 1.1256223667919436, "learning_rate": 7.95774647887324e-06, "loss": 0.8982350826263428, "step": 114 }, { "epoch": 0.04052863436123348, "grad_norm": 1.0914199985412718, "learning_rate": 8.028169014084509e-06, "loss": 0.8123869895935059, "step": 115 }, { "epoch": 0.040881057268722466, "grad_norm": 1.1505365914239516, "learning_rate": 8.098591549295775e-06, "loss": 1.0762536525726318, "step": 116 }, { "epoch": 0.04123348017621145, "grad_norm": 1.0367170014557934, "learning_rate": 8.169014084507043e-06, "loss": 1.004841923713684, "step": 117 }, { "epoch": 0.04158590308370044, "grad_norm": 1.0966724197265187, "learning_rate": 8.239436619718311e-06, "loss": 0.9237936735153198, "step": 118 }, { "epoch": 0.04193832599118943, "grad_norm": 1.0785540239343763, "learning_rate": 8.309859154929578e-06, "loss": 0.9038913249969482, "step": 119 }, { "epoch": 0.04229074889867841, "grad_norm": 1.1784096317090726, "learning_rate": 8.380281690140846e-06, "loss": 0.9488446712493896, "step": 120 }, { "epoch": 0.042643171806167404, "grad_norm": 1.1559534491366574, "learning_rate": 8.450704225352114e-06, "loss": 1.0862706899642944, "step": 121 }, { "epoch": 0.04299559471365639, "grad_norm": 1.5143452874154766, "learning_rate": 8.52112676056338e-06, "loss": 0.8882313966751099, "step": 122 }, { "epoch": 0.043348017621145374, "grad_norm": 1.1412568707979918, "learning_rate": 8.591549295774648e-06, "loss": 0.9125900268554688, "step": 123 }, { "epoch": 0.04370044052863436, "grad_norm": 1.403727281403332, "learning_rate": 8.661971830985915e-06, "loss": 0.944568395614624, "step": 124 }, { "epoch": 0.04405286343612335, "grad_norm": 1.2993905510610635, "learning_rate": 8.732394366197183e-06, "loss": 0.9303089380264282, "step": 125 }, { "epoch": 0.044405286343612335, "grad_norm": 1.1184314169128153, "learning_rate": 8.802816901408451e-06, "loss": 1.0983362197875977, "step": 126 }, { "epoch": 0.04475770925110132, "grad_norm": 1.40811546312751, "learning_rate": 8.87323943661972e-06, "loss": 1.002477765083313, "step": 127 }, { "epoch": 0.04511013215859031, "grad_norm": 1.1638063617076078, "learning_rate": 8.943661971830987e-06, "loss": 0.9994120001792908, "step": 128 }, { "epoch": 0.045462555066079297, "grad_norm": 1.2118035451866538, "learning_rate": 9.014084507042254e-06, "loss": 1.0785832405090332, "step": 129 }, { "epoch": 0.04581497797356828, "grad_norm": 1.0820277493757582, "learning_rate": 9.084507042253522e-06, "loss": 0.779441237449646, "step": 130 }, { "epoch": 0.046167400881057266, "grad_norm": 1.1766256779195974, "learning_rate": 9.15492957746479e-06, "loss": 1.0052348375320435, "step": 131 }, { "epoch": 0.04651982378854626, "grad_norm": 1.0771619013639089, "learning_rate": 9.225352112676057e-06, "loss": 1.0327996015548706, "step": 132 }, { "epoch": 0.04687224669603524, "grad_norm": 1.501276619683034, "learning_rate": 9.295774647887325e-06, "loss": 1.0643246173858643, "step": 133 }, { "epoch": 0.04722466960352423, "grad_norm": 1.1427145785080848, "learning_rate": 9.366197183098593e-06, "loss": 0.8449216485023499, "step": 134 }, { "epoch": 0.04757709251101322, "grad_norm": 1.2684019730338143, "learning_rate": 9.43661971830986e-06, "loss": 0.8867055177688599, "step": 135 }, { "epoch": 0.047929515418502204, "grad_norm": 1.4156875615017863, "learning_rate": 9.507042253521127e-06, "loss": 1.048499584197998, "step": 136 }, { "epoch": 0.04828193832599119, "grad_norm": 1.2120768691141688, "learning_rate": 9.577464788732394e-06, "loss": 1.0548617839813232, "step": 137 }, { "epoch": 0.048634361233480174, "grad_norm": 1.0679337780928526, "learning_rate": 9.647887323943664e-06, "loss": 0.8882845044136047, "step": 138 }, { "epoch": 0.048986784140969165, "grad_norm": 1.622342973826323, "learning_rate": 9.71830985915493e-06, "loss": 1.032647967338562, "step": 139 }, { "epoch": 0.04933920704845815, "grad_norm": 1.058782348686911, "learning_rate": 9.788732394366198e-06, "loss": 1.039523959159851, "step": 140 }, { "epoch": 0.049691629955947135, "grad_norm": 1.1234982994751406, "learning_rate": 9.859154929577466e-06, "loss": 0.8451036214828491, "step": 141 }, { "epoch": 0.05004405286343613, "grad_norm": 1.1376284406077708, "learning_rate": 9.929577464788733e-06, "loss": 0.8285897970199585, "step": 142 }, { "epoch": 0.05039647577092511, "grad_norm": 1.2057497016168632, "learning_rate": 1e-05, "loss": 0.9998278021812439, "step": 143 }, { "epoch": 0.050748898678414096, "grad_norm": 1.4528082359287422, "learning_rate": 1.0070422535211269e-05, "loss": 0.9782301187515259, "step": 144 }, { "epoch": 0.05110132158590308, "grad_norm": 1.1663459671948497, "learning_rate": 1.0140845070422535e-05, "loss": 1.0557070970535278, "step": 145 }, { "epoch": 0.05145374449339207, "grad_norm": 1.159146071512081, "learning_rate": 1.0211267605633803e-05, "loss": 0.9516133069992065, "step": 146 }, { "epoch": 0.05180616740088106, "grad_norm": 1.1694686158556986, "learning_rate": 1.0281690140845072e-05, "loss": 0.8965041637420654, "step": 147 }, { "epoch": 0.05215859030837004, "grad_norm": 1.2713520268346183, "learning_rate": 1.0352112676056338e-05, "loss": 0.8627057075500488, "step": 148 }, { "epoch": 0.052511013215859034, "grad_norm": 1.0456048049111641, "learning_rate": 1.0422535211267606e-05, "loss": 0.7627567648887634, "step": 149 }, { "epoch": 0.05286343612334802, "grad_norm": 1.2332422487154633, "learning_rate": 1.0492957746478873e-05, "loss": 0.8522504568099976, "step": 150 }, { "epoch": 0.053215859030837004, "grad_norm": 1.1106957565365498, "learning_rate": 1.056338028169014e-05, "loss": 0.7164312601089478, "step": 151 }, { "epoch": 0.05356828193832599, "grad_norm": 1.0487512934158103, "learning_rate": 1.0633802816901409e-05, "loss": 0.9141941070556641, "step": 152 }, { "epoch": 0.05392070484581498, "grad_norm": 1.5228596875919753, "learning_rate": 1.0704225352112675e-05, "loss": 0.9145504832267761, "step": 153 }, { "epoch": 0.054273127753303965, "grad_norm": 1.19745569358961, "learning_rate": 1.0774647887323943e-05, "loss": 0.9851646423339844, "step": 154 }, { "epoch": 0.05462555066079295, "grad_norm": 1.1547769204431162, "learning_rate": 1.0845070422535213e-05, "loss": 0.9319474697113037, "step": 155 }, { "epoch": 0.05497797356828194, "grad_norm": 1.242055483054837, "learning_rate": 1.0915492957746481e-05, "loss": 0.995783269405365, "step": 156 }, { "epoch": 0.05533039647577093, "grad_norm": 1.270129466753014, "learning_rate": 1.0985915492957748e-05, "loss": 0.8636226654052734, "step": 157 }, { "epoch": 0.05568281938325991, "grad_norm": 1.26388911778751, "learning_rate": 1.1056338028169016e-05, "loss": 0.8860869407653809, "step": 158 }, { "epoch": 0.056035242290748896, "grad_norm": 1.24911279543244, "learning_rate": 1.1126760563380284e-05, "loss": 0.9256196618080139, "step": 159 }, { "epoch": 0.05638766519823789, "grad_norm": 1.1684473229538663, "learning_rate": 1.119718309859155e-05, "loss": 0.8217915296554565, "step": 160 }, { "epoch": 0.05674008810572687, "grad_norm": 1.2938227991615623, "learning_rate": 1.1267605633802819e-05, "loss": 0.9808465838432312, "step": 161 }, { "epoch": 0.05709251101321586, "grad_norm": 1.2234654171305366, "learning_rate": 1.1338028169014087e-05, "loss": 0.7733014822006226, "step": 162 }, { "epoch": 0.05744493392070485, "grad_norm": 1.1428802626649461, "learning_rate": 1.1408450704225353e-05, "loss": 0.8581304550170898, "step": 163 }, { "epoch": 0.057797356828193834, "grad_norm": 1.3252890457476052, "learning_rate": 1.1478873239436621e-05, "loss": 0.9242054224014282, "step": 164 }, { "epoch": 0.05814977973568282, "grad_norm": 1.3695567443378234, "learning_rate": 1.1549295774647888e-05, "loss": 1.0302021503448486, "step": 165 }, { "epoch": 0.058502202643171804, "grad_norm": 1.2950143159958714, "learning_rate": 1.1619718309859156e-05, "loss": 0.8954275846481323, "step": 166 }, { "epoch": 0.058854625550660795, "grad_norm": 1.1779404187828553, "learning_rate": 1.1690140845070424e-05, "loss": 0.891846776008606, "step": 167 }, { "epoch": 0.05920704845814978, "grad_norm": 1.1837706775348158, "learning_rate": 1.176056338028169e-05, "loss": 0.887005627155304, "step": 168 }, { "epoch": 0.059559471365638765, "grad_norm": 1.289448297537656, "learning_rate": 1.1830985915492958e-05, "loss": 0.9020301103591919, "step": 169 }, { "epoch": 0.05991189427312775, "grad_norm": 1.2185831955131692, "learning_rate": 1.1901408450704227e-05, "loss": 0.7925454378128052, "step": 170 }, { "epoch": 0.06026431718061674, "grad_norm": 1.31750363404193, "learning_rate": 1.1971830985915493e-05, "loss": 0.8058332800865173, "step": 171 }, { "epoch": 0.060616740088105726, "grad_norm": 1.2435062872951204, "learning_rate": 1.2042253521126761e-05, "loss": 0.892992377281189, "step": 172 }, { "epoch": 0.06096916299559471, "grad_norm": 1.0835922361658872, "learning_rate": 1.211267605633803e-05, "loss": 0.8482734560966492, "step": 173 }, { "epoch": 0.0613215859030837, "grad_norm": 1.2806384537102478, "learning_rate": 1.2183098591549296e-05, "loss": 0.8652878999710083, "step": 174 }, { "epoch": 0.06167400881057269, "grad_norm": 1.183930720799068, "learning_rate": 1.2253521126760564e-05, "loss": 0.8590051531791687, "step": 175 }, { "epoch": 0.06202643171806167, "grad_norm": 1.1264180921527844, "learning_rate": 1.232394366197183e-05, "loss": 0.7106916904449463, "step": 176 }, { "epoch": 0.06237885462555066, "grad_norm": 1.5304901042334342, "learning_rate": 1.2394366197183098e-05, "loss": 0.9298936128616333, "step": 177 }, { "epoch": 0.06273127753303964, "grad_norm": 1.3380597134261425, "learning_rate": 1.2464788732394367e-05, "loss": 1.027758240699768, "step": 178 }, { "epoch": 0.06308370044052863, "grad_norm": 1.4071851827143296, "learning_rate": 1.2535211267605636e-05, "loss": 0.9576354026794434, "step": 179 }, { "epoch": 0.06343612334801763, "grad_norm": 1.476054189108656, "learning_rate": 1.2605633802816903e-05, "loss": 0.6881245374679565, "step": 180 }, { "epoch": 0.0637885462555066, "grad_norm": 1.736658600923819, "learning_rate": 1.2676056338028171e-05, "loss": 0.9629781246185303, "step": 181 }, { "epoch": 0.0641409691629956, "grad_norm": 1.3990061114909895, "learning_rate": 1.2746478873239439e-05, "loss": 0.849892258644104, "step": 182 }, { "epoch": 0.06449339207048459, "grad_norm": 1.2776276047787312, "learning_rate": 1.2816901408450705e-05, "loss": 0.9294229745864868, "step": 183 }, { "epoch": 0.06484581497797356, "grad_norm": 1.346185395248099, "learning_rate": 1.2887323943661974e-05, "loss": 0.9534600973129272, "step": 184 }, { "epoch": 0.06519823788546256, "grad_norm": 1.2547825941083024, "learning_rate": 1.2957746478873242e-05, "loss": 0.7937755584716797, "step": 185 }, { "epoch": 0.06555066079295155, "grad_norm": 1.215372024356157, "learning_rate": 1.3028169014084508e-05, "loss": 0.9188590049743652, "step": 186 }, { "epoch": 0.06590308370044053, "grad_norm": 1.3372931395210206, "learning_rate": 1.3098591549295776e-05, "loss": 0.8775123357772827, "step": 187 }, { "epoch": 0.06625550660792952, "grad_norm": 1.2703292803517752, "learning_rate": 1.3169014084507044e-05, "loss": 0.8562190532684326, "step": 188 }, { "epoch": 0.0666079295154185, "grad_norm": 1.1593142823065046, "learning_rate": 1.323943661971831e-05, "loss": 0.9427295327186584, "step": 189 }, { "epoch": 0.06696035242290749, "grad_norm": 1.1080518257913534, "learning_rate": 1.3309859154929579e-05, "loss": 0.6142286062240601, "step": 190 }, { "epoch": 0.06731277533039648, "grad_norm": 1.416041365414943, "learning_rate": 1.3380281690140845e-05, "loss": 0.7480863332748413, "step": 191 }, { "epoch": 0.06766519823788546, "grad_norm": 1.6287312517465182, "learning_rate": 1.3450704225352114e-05, "loss": 0.898857593536377, "step": 192 }, { "epoch": 0.06801762114537445, "grad_norm": 1.4737642135415263, "learning_rate": 1.3521126760563382e-05, "loss": 0.8584127426147461, "step": 193 }, { "epoch": 0.06837004405286344, "grad_norm": 1.2178631494207084, "learning_rate": 1.3591549295774648e-05, "loss": 0.9400655031204224, "step": 194 }, { "epoch": 0.06872246696035242, "grad_norm": 1.2698602238237462, "learning_rate": 1.3661971830985916e-05, "loss": 0.7750787734985352, "step": 195 }, { "epoch": 0.06907488986784141, "grad_norm": 1.2474557266398312, "learning_rate": 1.3732394366197184e-05, "loss": 0.8530284762382507, "step": 196 }, { "epoch": 0.0694273127753304, "grad_norm": 1.3191630227557989, "learning_rate": 1.380281690140845e-05, "loss": 0.9019994735717773, "step": 197 }, { "epoch": 0.06977973568281938, "grad_norm": 1.1994310415476668, "learning_rate": 1.3873239436619719e-05, "loss": 0.7749642133712769, "step": 198 }, { "epoch": 0.07013215859030837, "grad_norm": 1.3060142025317714, "learning_rate": 1.3943661971830987e-05, "loss": 0.956200122833252, "step": 199 }, { "epoch": 0.07048458149779736, "grad_norm": 1.3510407726181874, "learning_rate": 1.4014084507042253e-05, "loss": 0.8544470071792603, "step": 200 }, { "epoch": 0.07083700440528634, "grad_norm": 1.447521091304659, "learning_rate": 1.4084507042253522e-05, "loss": 0.8776387572288513, "step": 201 }, { "epoch": 0.07118942731277533, "grad_norm": 1.5340123254246993, "learning_rate": 1.4154929577464788e-05, "loss": 0.9949591755867004, "step": 202 }, { "epoch": 0.07154185022026431, "grad_norm": 1.306920931788941, "learning_rate": 1.4225352112676058e-05, "loss": 0.9616764783859253, "step": 203 }, { "epoch": 0.0718942731277533, "grad_norm": 1.3490978686730206, "learning_rate": 1.4295774647887326e-05, "loss": 0.9247175455093384, "step": 204 }, { "epoch": 0.0722466960352423, "grad_norm": 1.4241509312853966, "learning_rate": 1.4366197183098594e-05, "loss": 0.7946479320526123, "step": 205 }, { "epoch": 0.07259911894273127, "grad_norm": 1.3949991357763207, "learning_rate": 1.443661971830986e-05, "loss": 0.7929860353469849, "step": 206 }, { "epoch": 0.07295154185022026, "grad_norm": 1.3725430537583514, "learning_rate": 1.4507042253521129e-05, "loss": 0.9215391874313354, "step": 207 }, { "epoch": 0.07330396475770926, "grad_norm": 1.4247014676365253, "learning_rate": 1.4577464788732397e-05, "loss": 0.8767607808113098, "step": 208 }, { "epoch": 0.07365638766519823, "grad_norm": 1.3691339839746066, "learning_rate": 1.4647887323943663e-05, "loss": 0.8586276769638062, "step": 209 }, { "epoch": 0.07400881057268723, "grad_norm": 1.3252388254138234, "learning_rate": 1.4718309859154931e-05, "loss": 0.8680851459503174, "step": 210 }, { "epoch": 0.07436123348017622, "grad_norm": 1.2834178375463614, "learning_rate": 1.47887323943662e-05, "loss": 0.8887720108032227, "step": 211 }, { "epoch": 0.0747136563876652, "grad_norm": 1.4918681608584679, "learning_rate": 1.4859154929577466e-05, "loss": 0.8887100219726562, "step": 212 }, { "epoch": 0.07506607929515419, "grad_norm": 1.247870788657092, "learning_rate": 1.4929577464788734e-05, "loss": 0.9257807731628418, "step": 213 }, { "epoch": 0.07541850220264318, "grad_norm": 1.2922967878533598, "learning_rate": 1.5000000000000002e-05, "loss": 0.8107355833053589, "step": 214 }, { "epoch": 0.07577092511013216, "grad_norm": 1.390091314994072, "learning_rate": 1.5070422535211269e-05, "loss": 0.8765913844108582, "step": 215 }, { "epoch": 0.07612334801762115, "grad_norm": 1.3936279931065536, "learning_rate": 1.5140845070422537e-05, "loss": 0.8973524570465088, "step": 216 }, { "epoch": 0.07647577092511013, "grad_norm": 1.310665112588589, "learning_rate": 1.5211267605633803e-05, "loss": 0.9194613695144653, "step": 217 }, { "epoch": 0.07682819383259912, "grad_norm": 1.4152279415932816, "learning_rate": 1.528169014084507e-05, "loss": 0.8832643032073975, "step": 218 }, { "epoch": 0.07718061674008811, "grad_norm": 1.465705079678902, "learning_rate": 1.535211267605634e-05, "loss": 0.9575356245040894, "step": 219 }, { "epoch": 0.07753303964757709, "grad_norm": 1.2268114727867823, "learning_rate": 1.5422535211267607e-05, "loss": 0.8302342891693115, "step": 220 }, { "epoch": 0.07788546255506608, "grad_norm": 1.2978917843344704, "learning_rate": 1.5492957746478872e-05, "loss": 0.7999966144561768, "step": 221 }, { "epoch": 0.07823788546255507, "grad_norm": 1.271952593735668, "learning_rate": 1.556338028169014e-05, "loss": 0.8201859593391418, "step": 222 }, { "epoch": 0.07859030837004405, "grad_norm": 1.635464665304201, "learning_rate": 1.563380281690141e-05, "loss": 0.872761607170105, "step": 223 }, { "epoch": 0.07894273127753304, "grad_norm": 1.7544850567681591, "learning_rate": 1.5704225352112677e-05, "loss": 0.8695409297943115, "step": 224 }, { "epoch": 0.07929515418502203, "grad_norm": 1.2478131333285527, "learning_rate": 1.5774647887323945e-05, "loss": 0.8532050848007202, "step": 225 }, { "epoch": 0.07964757709251101, "grad_norm": 1.5276196879895285, "learning_rate": 1.5845070422535213e-05, "loss": 0.7875121235847473, "step": 226 }, { "epoch": 0.08, "grad_norm": 1.5837485275916963, "learning_rate": 1.591549295774648e-05, "loss": 0.7131509780883789, "step": 227 }, { "epoch": 0.080352422907489, "grad_norm": 1.4681482709870555, "learning_rate": 1.598591549295775e-05, "loss": 0.9758431911468506, "step": 228 }, { "epoch": 0.08070484581497797, "grad_norm": 1.4451165548552447, "learning_rate": 1.6056338028169017e-05, "loss": 0.7894232273101807, "step": 229 }, { "epoch": 0.08105726872246696, "grad_norm": 1.2417235745587356, "learning_rate": 1.6126760563380285e-05, "loss": 0.9933483600616455, "step": 230 }, { "epoch": 0.08140969162995594, "grad_norm": 1.4745298800972837, "learning_rate": 1.619718309859155e-05, "loss": 0.8424056768417358, "step": 231 }, { "epoch": 0.08176211453744493, "grad_norm": 1.4626597398090972, "learning_rate": 1.6267605633802818e-05, "loss": 0.7957695126533508, "step": 232 }, { "epoch": 0.08211453744493392, "grad_norm": 1.243843455131114, "learning_rate": 1.6338028169014086e-05, "loss": 0.8491722345352173, "step": 233 }, { "epoch": 0.0824669603524229, "grad_norm": 1.407640698868158, "learning_rate": 1.6408450704225354e-05, "loss": 0.7010964751243591, "step": 234 }, { "epoch": 0.0828193832599119, "grad_norm": 1.4584433632361322, "learning_rate": 1.6478873239436623e-05, "loss": 0.8713864088058472, "step": 235 }, { "epoch": 0.08317180616740089, "grad_norm": 1.261328425360657, "learning_rate": 1.6549295774647887e-05, "loss": 0.6724761128425598, "step": 236 }, { "epoch": 0.08352422907488986, "grad_norm": 1.219837126653021, "learning_rate": 1.6619718309859155e-05, "loss": 0.8612109422683716, "step": 237 }, { "epoch": 0.08387665198237886, "grad_norm": 1.4745868727167897, "learning_rate": 1.6690140845070424e-05, "loss": 0.5697110891342163, "step": 238 }, { "epoch": 0.08422907488986785, "grad_norm": 1.2506294676144012, "learning_rate": 1.676056338028169e-05, "loss": 0.7877228260040283, "step": 239 }, { "epoch": 0.08458149779735682, "grad_norm": 1.1492235860181979, "learning_rate": 1.683098591549296e-05, "loss": 0.8751014471054077, "step": 240 }, { "epoch": 0.08493392070484582, "grad_norm": 1.527957574033417, "learning_rate": 1.6901408450704228e-05, "loss": 0.8731381893157959, "step": 241 }, { "epoch": 0.08528634361233481, "grad_norm": 1.291362512763109, "learning_rate": 1.6971830985915493e-05, "loss": 0.831383228302002, "step": 242 }, { "epoch": 0.08563876651982379, "grad_norm": 1.2699070733171296, "learning_rate": 1.704225352112676e-05, "loss": 0.792934238910675, "step": 243 }, { "epoch": 0.08599118942731278, "grad_norm": 1.1592748972292606, "learning_rate": 1.711267605633803e-05, "loss": 0.6723657846450806, "step": 244 }, { "epoch": 0.08634361233480176, "grad_norm": 1.4796981905185658, "learning_rate": 1.7183098591549297e-05, "loss": 0.8377546072006226, "step": 245 }, { "epoch": 0.08669603524229075, "grad_norm": 1.2727987522874769, "learning_rate": 1.7253521126760565e-05, "loss": 0.8073972463607788, "step": 246 }, { "epoch": 0.08704845814977974, "grad_norm": 1.6240304260373406, "learning_rate": 1.732394366197183e-05, "loss": 0.8913615942001343, "step": 247 }, { "epoch": 0.08740088105726872, "grad_norm": 1.4436852067854697, "learning_rate": 1.7394366197183098e-05, "loss": 0.9133341312408447, "step": 248 }, { "epoch": 0.08775330396475771, "grad_norm": 1.6098073633875791, "learning_rate": 1.7464788732394366e-05, "loss": 0.7593938112258911, "step": 249 }, { "epoch": 0.0881057268722467, "grad_norm": 1.456505700957212, "learning_rate": 1.7535211267605638e-05, "loss": 0.8049266934394836, "step": 250 }, { "epoch": 0.08845814977973568, "grad_norm": 1.44397678174898, "learning_rate": 1.7605633802816902e-05, "loss": 0.9065679311752319, "step": 251 }, { "epoch": 0.08881057268722467, "grad_norm": 1.5285644429403964, "learning_rate": 1.767605633802817e-05, "loss": 0.9309085011482239, "step": 252 }, { "epoch": 0.08916299559471366, "grad_norm": 1.3367293223358285, "learning_rate": 1.774647887323944e-05, "loss": 0.7846949100494385, "step": 253 }, { "epoch": 0.08951541850220264, "grad_norm": 1.4721492627949804, "learning_rate": 1.7816901408450707e-05, "loss": 0.9153063297271729, "step": 254 }, { "epoch": 0.08986784140969163, "grad_norm": 1.2843813691966974, "learning_rate": 1.7887323943661975e-05, "loss": 0.7743638157844543, "step": 255 }, { "epoch": 0.09022026431718062, "grad_norm": 1.6034162783223496, "learning_rate": 1.7957746478873243e-05, "loss": 0.887751579284668, "step": 256 }, { "epoch": 0.0905726872246696, "grad_norm": 1.2387435479452011, "learning_rate": 1.8028169014084508e-05, "loss": 0.8072899580001831, "step": 257 }, { "epoch": 0.09092511013215859, "grad_norm": 1.3642448388425203, "learning_rate": 1.8098591549295776e-05, "loss": 0.8275943994522095, "step": 258 }, { "epoch": 0.09127753303964757, "grad_norm": 1.3287842865535133, "learning_rate": 1.8169014084507044e-05, "loss": 0.8300620913505554, "step": 259 }, { "epoch": 0.09162995594713656, "grad_norm": 1.26616505669333, "learning_rate": 1.8239436619718312e-05, "loss": 0.6886857748031616, "step": 260 }, { "epoch": 0.09198237885462555, "grad_norm": 2.689833624979495, "learning_rate": 1.830985915492958e-05, "loss": 0.8190158605575562, "step": 261 }, { "epoch": 0.09233480176211453, "grad_norm": 1.3392491700180422, "learning_rate": 1.8380281690140845e-05, "loss": 0.8500730991363525, "step": 262 }, { "epoch": 0.09268722466960352, "grad_norm": 1.499663410513064, "learning_rate": 1.8450704225352113e-05, "loss": 0.8340811729431152, "step": 263 }, { "epoch": 0.09303964757709252, "grad_norm": 1.3031308803407857, "learning_rate": 1.852112676056338e-05, "loss": 0.8055675029754639, "step": 264 }, { "epoch": 0.0933920704845815, "grad_norm": 1.410218243221954, "learning_rate": 1.859154929577465e-05, "loss": 0.7956680059432983, "step": 265 }, { "epoch": 0.09374449339207049, "grad_norm": 1.4181751660111779, "learning_rate": 1.8661971830985917e-05, "loss": 0.8232501745223999, "step": 266 }, { "epoch": 0.09409691629955948, "grad_norm": 1.472224530959967, "learning_rate": 1.8732394366197186e-05, "loss": 0.8808565139770508, "step": 267 }, { "epoch": 0.09444933920704845, "grad_norm": 1.5113548411958122, "learning_rate": 1.880281690140845e-05, "loss": 0.885380744934082, "step": 268 }, { "epoch": 0.09480176211453745, "grad_norm": 1.5009611452094687, "learning_rate": 1.887323943661972e-05, "loss": 0.8408790826797485, "step": 269 }, { "epoch": 0.09515418502202644, "grad_norm": 1.395810517840328, "learning_rate": 1.8943661971830987e-05, "loss": 0.7089993953704834, "step": 270 }, { "epoch": 0.09550660792951542, "grad_norm": 1.280231938177333, "learning_rate": 1.9014084507042255e-05, "loss": 0.7941038608551025, "step": 271 }, { "epoch": 0.09585903083700441, "grad_norm": 1.5210768015450882, "learning_rate": 1.9084507042253523e-05, "loss": 0.8269138932228088, "step": 272 }, { "epoch": 0.09621145374449339, "grad_norm": 1.5053903060638305, "learning_rate": 1.9154929577464788e-05, "loss": 0.8206192255020142, "step": 273 }, { "epoch": 0.09656387665198238, "grad_norm": 1.49737615599854, "learning_rate": 1.922535211267606e-05, "loss": 0.9146496653556824, "step": 274 }, { "epoch": 0.09691629955947137, "grad_norm": 1.1755726979972605, "learning_rate": 1.9295774647887327e-05, "loss": 0.6738560199737549, "step": 275 }, { "epoch": 0.09726872246696035, "grad_norm": 1.3169911381980228, "learning_rate": 1.9366197183098595e-05, "loss": 0.934916615486145, "step": 276 }, { "epoch": 0.09762114537444934, "grad_norm": 1.357245739203775, "learning_rate": 1.943661971830986e-05, "loss": 0.8952134847640991, "step": 277 }, { "epoch": 0.09797356828193833, "grad_norm": 1.3423178147772294, "learning_rate": 1.9507042253521128e-05, "loss": 0.9346420764923096, "step": 278 }, { "epoch": 0.09832599118942731, "grad_norm": 1.5698833191970427, "learning_rate": 1.9577464788732396e-05, "loss": 0.8781993985176086, "step": 279 }, { "epoch": 0.0986784140969163, "grad_norm": 1.4703395142125208, "learning_rate": 1.9647887323943664e-05, "loss": 0.8283448219299316, "step": 280 }, { "epoch": 0.09903083700440529, "grad_norm": 1.2650765439550704, "learning_rate": 1.9718309859154933e-05, "loss": 0.8010722398757935, "step": 281 }, { "epoch": 0.09938325991189427, "grad_norm": 1.3576050403922397, "learning_rate": 1.97887323943662e-05, "loss": 0.8697119951248169, "step": 282 }, { "epoch": 0.09973568281938326, "grad_norm": 1.098837792765385, "learning_rate": 1.9859154929577465e-05, "loss": 0.6448882818222046, "step": 283 }, { "epoch": 0.10008810572687225, "grad_norm": 1.5101908618325302, "learning_rate": 1.9929577464788734e-05, "loss": 0.7782007455825806, "step": 284 }, { "epoch": 0.10044052863436123, "grad_norm": 1.455658231417001, "learning_rate": 2e-05, "loss": 0.8131508827209473, "step": 285 }, { "epoch": 0.10079295154185022, "grad_norm": 1.4413777660177336, "learning_rate": 1.999999830265561e-05, "loss": 0.8592134714126587, "step": 286 }, { "epoch": 0.1011453744493392, "grad_norm": 1.5671417589518397, "learning_rate": 1.9999993210623002e-05, "loss": 0.9374675750732422, "step": 287 }, { "epoch": 0.10149779735682819, "grad_norm": 1.5499152824954487, "learning_rate": 1.9999984723903913e-05, "loss": 0.8416328430175781, "step": 288 }, { "epoch": 0.10185022026431718, "grad_norm": 1.267360297703748, "learning_rate": 1.9999972842501218e-05, "loss": 0.7587184906005859, "step": 289 }, { "epoch": 0.10220264317180616, "grad_norm": 1.4783535336356979, "learning_rate": 1.9999957566418956e-05, "loss": 1.010494351387024, "step": 290 }, { "epoch": 0.10255506607929515, "grad_norm": 1.3092025632301814, "learning_rate": 1.999993889566231e-05, "loss": 0.7942835092544556, "step": 291 }, { "epoch": 0.10290748898678415, "grad_norm": 1.4620379458028798, "learning_rate": 1.999991683023762e-05, "loss": 0.9069477915763855, "step": 292 }, { "epoch": 0.10325991189427312, "grad_norm": 1.781963673155629, "learning_rate": 1.9999891370152375e-05, "loss": 0.8776397705078125, "step": 293 }, { "epoch": 0.10361233480176212, "grad_norm": 1.3409879305652028, "learning_rate": 1.9999862515415216e-05, "loss": 0.8560416102409363, "step": 294 }, { "epoch": 0.10396475770925111, "grad_norm": 1.601676543787724, "learning_rate": 1.9999830266035942e-05, "loss": 0.9177321195602417, "step": 295 }, { "epoch": 0.10431718061674009, "grad_norm": 1.621521883940329, "learning_rate": 1.99997946220255e-05, "loss": 0.8830884695053101, "step": 296 }, { "epoch": 0.10466960352422908, "grad_norm": 1.5076951372471592, "learning_rate": 1.9999755583395987e-05, "loss": 0.913659930229187, "step": 297 }, { "epoch": 0.10502202643171807, "grad_norm": 1.48724181087663, "learning_rate": 1.999971315016066e-05, "loss": 0.773309588432312, "step": 298 }, { "epoch": 0.10537444933920705, "grad_norm": 1.4640758198016095, "learning_rate": 1.9999667322333916e-05, "loss": 0.8432563543319702, "step": 299 }, { "epoch": 0.10572687224669604, "grad_norm": 1.5419897004531282, "learning_rate": 1.999961809993132e-05, "loss": 0.9632397890090942, "step": 300 }, { "epoch": 0.10607929515418502, "grad_norm": 1.4657018761848883, "learning_rate": 1.999956548296958e-05, "loss": 0.8205600380897522, "step": 301 }, { "epoch": 0.10643171806167401, "grad_norm": 1.2908123355748096, "learning_rate": 1.9999509471466557e-05, "loss": 0.8789785504341125, "step": 302 }, { "epoch": 0.106784140969163, "grad_norm": 1.4062841050093677, "learning_rate": 1.999945006544126e-05, "loss": 0.8445791006088257, "step": 303 }, { "epoch": 0.10713656387665198, "grad_norm": 1.3201850616961108, "learning_rate": 1.9999387264913865e-05, "loss": 0.8025245666503906, "step": 304 }, { "epoch": 0.10748898678414097, "grad_norm": 1.3596018005437036, "learning_rate": 1.9999321069905688e-05, "loss": 0.9271318912506104, "step": 305 }, { "epoch": 0.10784140969162996, "grad_norm": 1.167387591378785, "learning_rate": 1.999925148043919e-05, "loss": 0.809894859790802, "step": 306 }, { "epoch": 0.10819383259911894, "grad_norm": 1.4267923203712158, "learning_rate": 1.999917849653801e-05, "loss": 0.8940669298171997, "step": 307 }, { "epoch": 0.10854625550660793, "grad_norm": 1.466148592973388, "learning_rate": 1.9999102118226912e-05, "loss": 0.9301233887672424, "step": 308 }, { "epoch": 0.10889867841409692, "grad_norm": 1.271175959298383, "learning_rate": 1.9999022345531834e-05, "loss": 0.6429216861724854, "step": 309 }, { "epoch": 0.1092511013215859, "grad_norm": 1.3392816449794738, "learning_rate": 1.999893917847985e-05, "loss": 0.7199009656906128, "step": 310 }, { "epoch": 0.10960352422907489, "grad_norm": 1.2732787140894477, "learning_rate": 1.999885261709919e-05, "loss": 0.8312395811080933, "step": 311 }, { "epoch": 0.10995594713656388, "grad_norm": 1.4809957988420102, "learning_rate": 1.999876266141924e-05, "loss": 0.8187745213508606, "step": 312 }, { "epoch": 0.11030837004405286, "grad_norm": 1.2638906346778362, "learning_rate": 1.9998669311470546e-05, "loss": 0.8632344603538513, "step": 313 }, { "epoch": 0.11066079295154185, "grad_norm": 1.5651718256034985, "learning_rate": 1.9998572567284787e-05, "loss": 0.8789447546005249, "step": 314 }, { "epoch": 0.11101321585903083, "grad_norm": 1.4657438576086577, "learning_rate": 1.999847242889481e-05, "loss": 0.7647864818572998, "step": 315 }, { "epoch": 0.11136563876651982, "grad_norm": 1.2962284510646964, "learning_rate": 1.9998368896334606e-05, "loss": 0.872633695602417, "step": 316 }, { "epoch": 0.11171806167400881, "grad_norm": 1.4704185501053861, "learning_rate": 1.9998261969639324e-05, "loss": 0.8249840140342712, "step": 317 }, { "epoch": 0.11207048458149779, "grad_norm": 1.6298830469717174, "learning_rate": 1.999815164884526e-05, "loss": 0.7558056116104126, "step": 318 }, { "epoch": 0.11242290748898678, "grad_norm": 1.3075257157183537, "learning_rate": 1.9998037933989866e-05, "loss": 0.7447441220283508, "step": 319 }, { "epoch": 0.11277533039647578, "grad_norm": 1.4956646267919036, "learning_rate": 1.9997920825111743e-05, "loss": 0.8260442018508911, "step": 320 }, { "epoch": 0.11312775330396475, "grad_norm": 1.2866274072297625, "learning_rate": 1.999780032225065e-05, "loss": 0.7916134595870972, "step": 321 }, { "epoch": 0.11348017621145375, "grad_norm": 1.3548711592442237, "learning_rate": 1.9997676425447486e-05, "loss": 0.7460259199142456, "step": 322 }, { "epoch": 0.11383259911894274, "grad_norm": 1.4664419676620792, "learning_rate": 1.9997549134744318e-05, "loss": 0.9739946126937866, "step": 323 }, { "epoch": 0.11418502202643172, "grad_norm": 1.3133090693965692, "learning_rate": 1.9997418450184352e-05, "loss": 0.7242900133132935, "step": 324 }, { "epoch": 0.1145374449339207, "grad_norm": 1.7023646414032152, "learning_rate": 1.9997284371811955e-05, "loss": 0.7645323276519775, "step": 325 }, { "epoch": 0.1148898678414097, "grad_norm": 1.3437215758424148, "learning_rate": 1.9997146899672638e-05, "loss": 0.7377017736434937, "step": 326 }, { "epoch": 0.11524229074889868, "grad_norm": 1.3608732999796416, "learning_rate": 1.9997006033813076e-05, "loss": 0.7117934226989746, "step": 327 }, { "epoch": 0.11559471365638767, "grad_norm": 1.485158034808982, "learning_rate": 1.999686177428108e-05, "loss": 0.8517680168151855, "step": 328 }, { "epoch": 0.11594713656387665, "grad_norm": 1.3118416735480631, "learning_rate": 1.9996714121125626e-05, "loss": 0.7099400758743286, "step": 329 }, { "epoch": 0.11629955947136564, "grad_norm": 1.3949559553781739, "learning_rate": 1.9996563074396838e-05, "loss": 0.8581711053848267, "step": 330 }, { "epoch": 0.11665198237885463, "grad_norm": 1.322464822656225, "learning_rate": 1.9996408634145994e-05, "loss": 0.7841953635215759, "step": 331 }, { "epoch": 0.11700440528634361, "grad_norm": 1.2580468593989962, "learning_rate": 1.9996250800425515e-05, "loss": 0.7376754879951477, "step": 332 }, { "epoch": 0.1173568281938326, "grad_norm": 1.3538742269891202, "learning_rate": 1.9996089573288985e-05, "loss": 0.8934558033943176, "step": 333 }, { "epoch": 0.11770925110132159, "grad_norm": 1.4597310886631008, "learning_rate": 1.999592495279113e-05, "loss": 0.7870250940322876, "step": 334 }, { "epoch": 0.11806167400881057, "grad_norm": 1.5788273084375275, "learning_rate": 1.9995756938987846e-05, "loss": 0.7026203274726868, "step": 335 }, { "epoch": 0.11841409691629956, "grad_norm": 2.206437289778364, "learning_rate": 1.999558553193616e-05, "loss": 1.0066381692886353, "step": 336 }, { "epoch": 0.11876651982378855, "grad_norm": 1.349262918557434, "learning_rate": 1.9995410731694255e-05, "loss": 0.7860246896743774, "step": 337 }, { "epoch": 0.11911894273127753, "grad_norm": 1.4261295710834618, "learning_rate": 1.999523253832148e-05, "loss": 0.8142588138580322, "step": 338 }, { "epoch": 0.11947136563876652, "grad_norm": 1.403543131076251, "learning_rate": 1.9995050951878317e-05, "loss": 0.9737639427185059, "step": 339 }, { "epoch": 0.1198237885462555, "grad_norm": 1.2538473699838193, "learning_rate": 1.999486597242642e-05, "loss": 0.6165765523910522, "step": 340 }, { "epoch": 0.12017621145374449, "grad_norm": 1.4403971646421685, "learning_rate": 1.999467760002857e-05, "loss": 0.8553996086120605, "step": 341 }, { "epoch": 0.12052863436123348, "grad_norm": 1.579218034733104, "learning_rate": 1.9994485834748725e-05, "loss": 0.9291022419929504, "step": 342 }, { "epoch": 0.12088105726872246, "grad_norm": 1.3583147087232978, "learning_rate": 1.9994290676651977e-05, "loss": 0.8309136629104614, "step": 343 }, { "epoch": 0.12123348017621145, "grad_norm": 1.2343518052190974, "learning_rate": 1.999409212580458e-05, "loss": 0.6963932514190674, "step": 344 }, { "epoch": 0.12158590308370044, "grad_norm": 1.126432291251887, "learning_rate": 1.9993890182273932e-05, "loss": 0.8220632076263428, "step": 345 }, { "epoch": 0.12193832599118942, "grad_norm": 1.5283410369228738, "learning_rate": 1.9993684846128588e-05, "loss": 0.8407794237136841, "step": 346 }, { "epoch": 0.12229074889867841, "grad_norm": 1.479739244816861, "learning_rate": 1.9993476117438257e-05, "loss": 0.795718789100647, "step": 347 }, { "epoch": 0.1226431718061674, "grad_norm": 1.3466106447402244, "learning_rate": 1.9993263996273792e-05, "loss": 0.7482223510742188, "step": 348 }, { "epoch": 0.12299559471365638, "grad_norm": 1.4606743428798505, "learning_rate": 1.99930484827072e-05, "loss": 0.814468264579773, "step": 349 }, { "epoch": 0.12334801762114538, "grad_norm": 1.5345713664893856, "learning_rate": 1.9992829576811648e-05, "loss": 0.8105748891830444, "step": 350 }, { "epoch": 0.12370044052863437, "grad_norm": 1.6869192314100032, "learning_rate": 1.9992607278661437e-05, "loss": 0.8756073713302612, "step": 351 }, { "epoch": 0.12405286343612335, "grad_norm": 1.228330868948225, "learning_rate": 1.9992381588332043e-05, "loss": 0.8643946647644043, "step": 352 }, { "epoch": 0.12440528634361234, "grad_norm": 1.1468400313164093, "learning_rate": 1.9992152505900067e-05, "loss": 0.7691172361373901, "step": 353 }, { "epoch": 0.12475770925110131, "grad_norm": 1.3198644948783926, "learning_rate": 1.9991920031443288e-05, "loss": 0.716686487197876, "step": 354 }, { "epoch": 0.12511013215859032, "grad_norm": 1.39334404424432, "learning_rate": 1.9991684165040616e-05, "loss": 0.697482705116272, "step": 355 }, { "epoch": 0.12546255506607928, "grad_norm": 1.5087579956634654, "learning_rate": 1.999144490677212e-05, "loss": 0.8039460182189941, "step": 356 }, { "epoch": 0.12581497797356828, "grad_norm": 1.3206582875495743, "learning_rate": 1.9991202256719032e-05, "loss": 0.872138261795044, "step": 357 }, { "epoch": 0.12616740088105727, "grad_norm": 1.330801420963485, "learning_rate": 1.999095621496371e-05, "loss": 0.8659502267837524, "step": 358 }, { "epoch": 0.12651982378854626, "grad_norm": 1.2062023445068855, "learning_rate": 1.9990706781589682e-05, "loss": 0.7585660219192505, "step": 359 }, { "epoch": 0.12687224669603525, "grad_norm": 1.349814688916852, "learning_rate": 1.9990453956681626e-05, "loss": 0.86381995677948, "step": 360 }, { "epoch": 0.12722466960352422, "grad_norm": 1.3080210647965176, "learning_rate": 1.9990197740325365e-05, "loss": 0.7623461484909058, "step": 361 }, { "epoch": 0.1275770925110132, "grad_norm": 1.4247026163468757, "learning_rate": 1.9989938132607877e-05, "loss": 0.8262917995452881, "step": 362 }, { "epoch": 0.1279295154185022, "grad_norm": 1.3245955099655373, "learning_rate": 1.9989675133617294e-05, "loss": 0.7879630327224731, "step": 363 }, { "epoch": 0.1282819383259912, "grad_norm": 1.5925116832241206, "learning_rate": 1.9989408743442892e-05, "loss": 0.8282565474510193, "step": 364 }, { "epoch": 0.12863436123348018, "grad_norm": 1.151308483630064, "learning_rate": 1.9989138962175105e-05, "loss": 0.8358104228973389, "step": 365 }, { "epoch": 0.12898678414096917, "grad_norm": 1.4831450607430074, "learning_rate": 1.9988865789905513e-05, "loss": 0.9111027121543884, "step": 366 }, { "epoch": 0.12933920704845814, "grad_norm": 1.4181532995073547, "learning_rate": 1.9988589226726847e-05, "loss": 0.766915500164032, "step": 367 }, { "epoch": 0.12969162995594713, "grad_norm": 1.3923253104774793, "learning_rate": 1.9988309272733e-05, "loss": 0.818048357963562, "step": 368 }, { "epoch": 0.13004405286343612, "grad_norm": 1.2625645815303237, "learning_rate": 1.9988025928019e-05, "loss": 0.8188307285308838, "step": 369 }, { "epoch": 0.1303964757709251, "grad_norm": 1.4656557007271924, "learning_rate": 1.998773919268104e-05, "loss": 0.88718181848526, "step": 370 }, { "epoch": 0.1307488986784141, "grad_norm": 1.3104922660776017, "learning_rate": 1.998744906681645e-05, "loss": 0.9173898696899414, "step": 371 }, { "epoch": 0.1311013215859031, "grad_norm": 1.4305544884130297, "learning_rate": 1.9987155550523725e-05, "loss": 0.8025110960006714, "step": 372 }, { "epoch": 0.13145374449339206, "grad_norm": 1.2328392002659898, "learning_rate": 1.9986858643902502e-05, "loss": 0.8931341767311096, "step": 373 }, { "epoch": 0.13180616740088105, "grad_norm": 1.258415234092876, "learning_rate": 1.9986558347053574e-05, "loss": 0.8813796043395996, "step": 374 }, { "epoch": 0.13215859030837004, "grad_norm": 1.3254702068923054, "learning_rate": 1.9986254660078877e-05, "loss": 0.8021976947784424, "step": 375 }, { "epoch": 0.13251101321585904, "grad_norm": 1.3001638136254743, "learning_rate": 1.9985947583081506e-05, "loss": 0.8083860874176025, "step": 376 }, { "epoch": 0.13286343612334803, "grad_norm": 1.2519881014381842, "learning_rate": 1.9985637116165705e-05, "loss": 0.7639983296394348, "step": 377 }, { "epoch": 0.133215859030837, "grad_norm": 1.3308962501940544, "learning_rate": 1.9985323259436874e-05, "loss": 0.7775800228118896, "step": 378 }, { "epoch": 0.13356828193832598, "grad_norm": 1.3822704707659155, "learning_rate": 1.9985006013001545e-05, "loss": 0.8892228603363037, "step": 379 }, { "epoch": 0.13392070484581498, "grad_norm": 1.4007373611969895, "learning_rate": 1.998468537696742e-05, "loss": 0.9158765077590942, "step": 380 }, { "epoch": 0.13427312775330397, "grad_norm": 1.2142103786325267, "learning_rate": 1.9984361351443343e-05, "loss": 0.7523722648620605, "step": 381 }, { "epoch": 0.13462555066079296, "grad_norm": 1.5406874167870075, "learning_rate": 1.998403393653932e-05, "loss": 0.8052740693092346, "step": 382 }, { "epoch": 0.13497797356828195, "grad_norm": 4.839014305582762, "learning_rate": 1.9983703132366484e-05, "loss": 0.8271476626396179, "step": 383 }, { "epoch": 0.13533039647577091, "grad_norm": 1.3724243356768093, "learning_rate": 1.998336893903714e-05, "loss": 0.8904454112052917, "step": 384 }, { "epoch": 0.1356828193832599, "grad_norm": 1.5086695454887955, "learning_rate": 1.9983031356664733e-05, "loss": 0.8705847263336182, "step": 385 }, { "epoch": 0.1360352422907489, "grad_norm": 1.3562221939291232, "learning_rate": 1.9982690385363867e-05, "loss": 0.8269569873809814, "step": 386 }, { "epoch": 0.1363876651982379, "grad_norm": 1.6156870918588995, "learning_rate": 1.998234602525029e-05, "loss": 0.9796818494796753, "step": 387 }, { "epoch": 0.13674008810572688, "grad_norm": 1.5268638185003427, "learning_rate": 1.9981998276440892e-05, "loss": 0.8276596665382385, "step": 388 }, { "epoch": 0.13709251101321585, "grad_norm": 1.1979978409172833, "learning_rate": 1.9981647139053737e-05, "loss": 0.8739231824874878, "step": 389 }, { "epoch": 0.13744493392070484, "grad_norm": 1.517970302113154, "learning_rate": 1.9981292613208018e-05, "loss": 0.677521824836731, "step": 390 }, { "epoch": 0.13779735682819383, "grad_norm": 1.483399153515808, "learning_rate": 1.9980934699024084e-05, "loss": 0.744938313961029, "step": 391 }, { "epoch": 0.13814977973568282, "grad_norm": 1.423178346498717, "learning_rate": 1.998057339662344e-05, "loss": 0.8367065787315369, "step": 392 }, { "epoch": 0.1385022026431718, "grad_norm": 1.6714277386990386, "learning_rate": 1.9980208706128733e-05, "loss": 0.775547981262207, "step": 393 }, { "epoch": 0.1388546255506608, "grad_norm": 1.244274379470138, "learning_rate": 1.9979840627663764e-05, "loss": 0.8287982940673828, "step": 394 }, { "epoch": 0.13920704845814977, "grad_norm": 1.429588244120958, "learning_rate": 1.997946916135349e-05, "loss": 0.7582247257232666, "step": 395 }, { "epoch": 0.13955947136563876, "grad_norm": 1.309709423857836, "learning_rate": 1.997909430732401e-05, "loss": 0.968267560005188, "step": 396 }, { "epoch": 0.13991189427312775, "grad_norm": 1.4247483192434738, "learning_rate": 1.9978716065702566e-05, "loss": 0.8850257396697998, "step": 397 }, { "epoch": 0.14026431718061674, "grad_norm": 1.1261344584223945, "learning_rate": 1.9978334436617574e-05, "loss": 0.7206246852874756, "step": 398 }, { "epoch": 0.14061674008810573, "grad_norm": 1.2702546976441136, "learning_rate": 1.9977949420198576e-05, "loss": 0.7833065986633301, "step": 399 }, { "epoch": 0.14096916299559473, "grad_norm": 1.2940706461552187, "learning_rate": 1.9977561016576275e-05, "loss": 0.7199673652648926, "step": 400 }, { "epoch": 0.1413215859030837, "grad_norm": 1.3300807823897647, "learning_rate": 1.9977169225882522e-05, "loss": 0.7544811367988586, "step": 401 }, { "epoch": 0.14167400881057268, "grad_norm": 1.3500860064281444, "learning_rate": 1.9976774048250317e-05, "loss": 0.7528219819068909, "step": 402 }, { "epoch": 0.14202643171806167, "grad_norm": 1.230028309495833, "learning_rate": 1.9976375483813814e-05, "loss": 0.8025565147399902, "step": 403 }, { "epoch": 0.14237885462555067, "grad_norm": 1.271700071603726, "learning_rate": 1.997597353270831e-05, "loss": 0.6553962230682373, "step": 404 }, { "epoch": 0.14273127753303966, "grad_norm": 1.195900427449374, "learning_rate": 1.9975568195070253e-05, "loss": 0.7070015072822571, "step": 405 }, { "epoch": 0.14308370044052862, "grad_norm": 1.238996854756085, "learning_rate": 1.9975159471037247e-05, "loss": 0.7454725503921509, "step": 406 }, { "epoch": 0.1434361233480176, "grad_norm": 1.5517260528670263, "learning_rate": 1.9974747360748038e-05, "loss": 0.7074518799781799, "step": 407 }, { "epoch": 0.1437885462555066, "grad_norm": 1.4240478656973132, "learning_rate": 1.9974331864342527e-05, "loss": 0.6870182752609253, "step": 408 }, { "epoch": 0.1441409691629956, "grad_norm": 1.5514938206230895, "learning_rate": 1.9973912981961763e-05, "loss": 0.826898455619812, "step": 409 }, { "epoch": 0.1444933920704846, "grad_norm": 1.483679538302774, "learning_rate": 1.997349071374794e-05, "loss": 0.7244436740875244, "step": 410 }, { "epoch": 0.14484581497797358, "grad_norm": 1.2681717185328807, "learning_rate": 1.9973065059844404e-05, "loss": 0.6885448694229126, "step": 411 }, { "epoch": 0.14519823788546254, "grad_norm": 1.3797417122455713, "learning_rate": 1.9972636020395653e-05, "loss": 0.8477644920349121, "step": 412 }, { "epoch": 0.14555066079295154, "grad_norm": 1.5051840849568912, "learning_rate": 1.9972203595547334e-05, "loss": 0.9432111382484436, "step": 413 }, { "epoch": 0.14590308370044053, "grad_norm": 1.351618505603555, "learning_rate": 1.9971767785446243e-05, "loss": 1.0101501941680908, "step": 414 }, { "epoch": 0.14625550660792952, "grad_norm": 1.421926997117087, "learning_rate": 1.997132859024032e-05, "loss": 0.8174984455108643, "step": 415 }, { "epoch": 0.1466079295154185, "grad_norm": 1.1573592385577054, "learning_rate": 1.997088601007866e-05, "loss": 0.6857198476791382, "step": 416 }, { "epoch": 0.14696035242290748, "grad_norm": 1.1795540078822444, "learning_rate": 1.9970440045111505e-05, "loss": 0.7742792367935181, "step": 417 }, { "epoch": 0.14731277533039647, "grad_norm": 1.783143700583216, "learning_rate": 1.996999069549025e-05, "loss": 0.7489269971847534, "step": 418 }, { "epoch": 0.14766519823788546, "grad_norm": 1.4327273961807123, "learning_rate": 1.9969537961367423e-05, "loss": 0.7362021207809448, "step": 419 }, { "epoch": 0.14801762114537445, "grad_norm": 1.3763810595433905, "learning_rate": 1.996908184289673e-05, "loss": 0.7596213221549988, "step": 420 }, { "epoch": 0.14837004405286344, "grad_norm": 1.3357573192960268, "learning_rate": 1.9968622340232993e-05, "loss": 0.7739163637161255, "step": 421 }, { "epoch": 0.14872246696035243, "grad_norm": 1.2890109075687697, "learning_rate": 1.9968159453532215e-05, "loss": 0.9059790372848511, "step": 422 }, { "epoch": 0.1490748898678414, "grad_norm": 1.4830814966077062, "learning_rate": 1.9967693182951516e-05, "loss": 0.7298871278762817, "step": 423 }, { "epoch": 0.1494273127753304, "grad_norm": 1.3303231094936145, "learning_rate": 1.9967223528649194e-05, "loss": 0.7218194007873535, "step": 424 }, { "epoch": 0.14977973568281938, "grad_norm": 1.3738677080017252, "learning_rate": 1.996675049078467e-05, "loss": 0.8031259179115295, "step": 425 }, { "epoch": 0.15013215859030837, "grad_norm": 1.402915539690338, "learning_rate": 1.9966274069518533e-05, "loss": 0.8583194613456726, "step": 426 }, { "epoch": 0.15048458149779737, "grad_norm": 1.5081794718854693, "learning_rate": 1.9965794265012514e-05, "loss": 0.7829155921936035, "step": 427 }, { "epoch": 0.15083700440528636, "grad_norm": 1.3040065928659967, "learning_rate": 1.9965311077429484e-05, "loss": 0.709203839302063, "step": 428 }, { "epoch": 0.15118942731277532, "grad_norm": 1.324153309243564, "learning_rate": 1.996482450693348e-05, "loss": 0.7515710592269897, "step": 429 }, { "epoch": 0.1515418502202643, "grad_norm": 1.5966034920450463, "learning_rate": 1.9964334553689674e-05, "loss": 0.8552615642547607, "step": 430 }, { "epoch": 0.1518942731277533, "grad_norm": 1.3833039246024212, "learning_rate": 1.9963841217864385e-05, "loss": 0.7946224808692932, "step": 431 }, { "epoch": 0.1522466960352423, "grad_norm": 1.351342046961, "learning_rate": 1.9963344499625087e-05, "loss": 0.7117756605148315, "step": 432 }, { "epoch": 0.1525991189427313, "grad_norm": 1.5677032677150589, "learning_rate": 1.9962844399140405e-05, "loss": 0.8892849683761597, "step": 433 }, { "epoch": 0.15295154185022025, "grad_norm": 1.6682742006947457, "learning_rate": 1.9962340916580105e-05, "loss": 0.9037783145904541, "step": 434 }, { "epoch": 0.15330396475770924, "grad_norm": 1.3178590359087465, "learning_rate": 1.9961834052115104e-05, "loss": 0.7419179677963257, "step": 435 }, { "epoch": 0.15365638766519824, "grad_norm": 1.500659178246394, "learning_rate": 1.9961323805917464e-05, "loss": 0.847285270690918, "step": 436 }, { "epoch": 0.15400881057268723, "grad_norm": 1.520891708486689, "learning_rate": 1.99608101781604e-05, "loss": 0.793263852596283, "step": 437 }, { "epoch": 0.15436123348017622, "grad_norm": 1.2927327484478677, "learning_rate": 1.9960293169018276e-05, "loss": 0.6600923538208008, "step": 438 }, { "epoch": 0.1547136563876652, "grad_norm": 1.178823428760428, "learning_rate": 1.9959772778666592e-05, "loss": 0.7642164826393127, "step": 439 }, { "epoch": 0.15506607929515417, "grad_norm": 1.4230767051116806, "learning_rate": 1.995924900728201e-05, "loss": 0.897221565246582, "step": 440 }, { "epoch": 0.15541850220264317, "grad_norm": 1.3912415328195475, "learning_rate": 1.9958721855042338e-05, "loss": 0.830953061580658, "step": 441 }, { "epoch": 0.15577092511013216, "grad_norm": 1.3683790024985447, "learning_rate": 1.995819132212652e-05, "loss": 0.7514863014221191, "step": 442 }, { "epoch": 0.15612334801762115, "grad_norm": 1.3179910502987273, "learning_rate": 1.995765740871466e-05, "loss": 0.7039257287979126, "step": 443 }, { "epoch": 0.15647577092511014, "grad_norm": 1.5017230130600239, "learning_rate": 1.9957120114988e-05, "loss": 0.810503363609314, "step": 444 }, { "epoch": 0.1568281938325991, "grad_norm": 1.4050071397488821, "learning_rate": 1.9956579441128942e-05, "loss": 0.616968035697937, "step": 445 }, { "epoch": 0.1571806167400881, "grad_norm": 1.3149075420166694, "learning_rate": 1.9956035387321024e-05, "loss": 0.7008740901947021, "step": 446 }, { "epoch": 0.1575330396475771, "grad_norm": 1.4992101173925434, "learning_rate": 1.995548795374893e-05, "loss": 0.847025454044342, "step": 447 }, { "epoch": 0.15788546255506608, "grad_norm": 1.3763555067673139, "learning_rate": 1.9954937140598506e-05, "loss": 0.7788053750991821, "step": 448 }, { "epoch": 0.15823788546255507, "grad_norm": 1.301728118921247, "learning_rate": 1.9954382948056735e-05, "loss": 0.7592896819114685, "step": 449 }, { "epoch": 0.15859030837004406, "grad_norm": 1.6001158206313053, "learning_rate": 1.995382537631174e-05, "loss": 0.9458491802215576, "step": 450 }, { "epoch": 0.15894273127753303, "grad_norm": 1.3218132869761372, "learning_rate": 1.9953264425552804e-05, "loss": 0.8069632053375244, "step": 451 }, { "epoch": 0.15929515418502202, "grad_norm": 1.316918406992957, "learning_rate": 1.9952700095970357e-05, "loss": 0.7876379489898682, "step": 452 }, { "epoch": 0.159647577092511, "grad_norm": 1.5440089355741875, "learning_rate": 1.9952132387755965e-05, "loss": 0.796333909034729, "step": 453 }, { "epoch": 0.16, "grad_norm": 1.243828269503452, "learning_rate": 1.9951561301102348e-05, "loss": 0.7171634435653687, "step": 454 }, { "epoch": 0.160352422907489, "grad_norm": 1.429835470120866, "learning_rate": 1.9950986836203374e-05, "loss": 0.8312792778015137, "step": 455 }, { "epoch": 0.160704845814978, "grad_norm": 1.4333167021702193, "learning_rate": 1.995040899325406e-05, "loss": 0.7496857643127441, "step": 456 }, { "epoch": 0.16105726872246695, "grad_norm": 1.2513531381670333, "learning_rate": 1.9949827772450555e-05, "loss": 0.89504075050354, "step": 457 }, { "epoch": 0.16140969162995594, "grad_norm": 1.5536951579594835, "learning_rate": 1.9949243173990172e-05, "loss": 0.7580761313438416, "step": 458 }, { "epoch": 0.16176211453744493, "grad_norm": 1.6782383396512721, "learning_rate": 1.9948655198071365e-05, "loss": 0.7826676368713379, "step": 459 }, { "epoch": 0.16211453744493393, "grad_norm": 1.5979456835427475, "learning_rate": 1.9948063844893733e-05, "loss": 0.7591372728347778, "step": 460 }, { "epoch": 0.16246696035242292, "grad_norm": 1.394749193132719, "learning_rate": 1.994746911465802e-05, "loss": 0.7366905808448792, "step": 461 }, { "epoch": 0.16281938325991188, "grad_norm": 1.2449236570155473, "learning_rate": 1.9946871007566116e-05, "loss": 0.7152266502380371, "step": 462 }, { "epoch": 0.16317180616740087, "grad_norm": 1.475247855733958, "learning_rate": 1.994626952382107e-05, "loss": 0.8411930799484253, "step": 463 }, { "epoch": 0.16352422907488987, "grad_norm": 1.1709525471997975, "learning_rate": 1.9945664663627054e-05, "loss": 0.6689857244491577, "step": 464 }, { "epoch": 0.16387665198237886, "grad_norm": 1.3007920668059838, "learning_rate": 1.9945056427189408e-05, "loss": 0.6474499106407166, "step": 465 }, { "epoch": 0.16422907488986785, "grad_norm": 1.397646475804827, "learning_rate": 1.9944444814714604e-05, "loss": 0.7861372232437134, "step": 466 }, { "epoch": 0.16458149779735684, "grad_norm": 1.4072541980161448, "learning_rate": 1.9943829826410273e-05, "loss": 0.8301665186882019, "step": 467 }, { "epoch": 0.1649339207048458, "grad_norm": 1.1473159016242473, "learning_rate": 1.9943211462485176e-05, "loss": 0.661811888217926, "step": 468 }, { "epoch": 0.1652863436123348, "grad_norm": 1.4009911983471504, "learning_rate": 1.9942589723149233e-05, "loss": 0.7768537402153015, "step": 469 }, { "epoch": 0.1656387665198238, "grad_norm": 1.209922489625636, "learning_rate": 1.9941964608613503e-05, "loss": 0.6139112710952759, "step": 470 }, { "epoch": 0.16599118942731278, "grad_norm": 1.3814257371396368, "learning_rate": 1.9941336119090193e-05, "loss": 0.8284693956375122, "step": 471 }, { "epoch": 0.16634361233480177, "grad_norm": 1.2594577624707568, "learning_rate": 1.9940704254792655e-05, "loss": 0.7281739711761475, "step": 472 }, { "epoch": 0.16669603524229074, "grad_norm": 1.4773463672265492, "learning_rate": 1.994006901593539e-05, "loss": 0.687767744064331, "step": 473 }, { "epoch": 0.16704845814977973, "grad_norm": 1.3067539084660165, "learning_rate": 1.9939430402734046e-05, "loss": 0.7553595304489136, "step": 474 }, { "epoch": 0.16740088105726872, "grad_norm": 1.5537103296420662, "learning_rate": 1.99387884154054e-05, "loss": 0.9263294339179993, "step": 475 }, { "epoch": 0.1677533039647577, "grad_norm": 1.5514792381885942, "learning_rate": 1.9938143054167397e-05, "loss": 0.7014337182044983, "step": 476 }, { "epoch": 0.1681057268722467, "grad_norm": 1.1598559513797833, "learning_rate": 1.9937494319239112e-05, "loss": 0.6454538106918335, "step": 477 }, { "epoch": 0.1684581497797357, "grad_norm": 1.3402764899565285, "learning_rate": 1.9936842210840775e-05, "loss": 0.7792352437973022, "step": 478 }, { "epoch": 0.16881057268722466, "grad_norm": 1.481603380133959, "learning_rate": 1.9936186729193753e-05, "loss": 0.8773127794265747, "step": 479 }, { "epoch": 0.16916299559471365, "grad_norm": 1.3472965431143242, "learning_rate": 1.993552787452056e-05, "loss": 0.892439603805542, "step": 480 }, { "epoch": 0.16951541850220264, "grad_norm": 1.5839752051025837, "learning_rate": 1.993486564704486e-05, "loss": 0.89835524559021, "step": 481 }, { "epoch": 0.16986784140969163, "grad_norm": 1.4593777249036533, "learning_rate": 1.9934200046991453e-05, "loss": 0.8013701438903809, "step": 482 }, { "epoch": 0.17022026431718063, "grad_norm": 1.5168797838116639, "learning_rate": 1.9933531074586296e-05, "loss": 0.8086763620376587, "step": 483 }, { "epoch": 0.17057268722466962, "grad_norm": 1.4399310447978144, "learning_rate": 1.9932858730056486e-05, "loss": 0.7736518383026123, "step": 484 }, { "epoch": 0.17092511013215858, "grad_norm": 1.2982542574143365, "learning_rate": 1.9932183013630257e-05, "loss": 0.6247539520263672, "step": 485 }, { "epoch": 0.17127753303964757, "grad_norm": 1.519445958865324, "learning_rate": 1.9931503925536996e-05, "loss": 0.7172006368637085, "step": 486 }, { "epoch": 0.17162995594713656, "grad_norm": 1.3043787656359138, "learning_rate": 1.993082146600723e-05, "loss": 0.7854465246200562, "step": 487 }, { "epoch": 0.17198237885462556, "grad_norm": 1.2038371426907561, "learning_rate": 1.9930135635272637e-05, "loss": 0.7018419504165649, "step": 488 }, { "epoch": 0.17233480176211455, "grad_norm": 1.2578522146284077, "learning_rate": 1.9929446433566033e-05, "loss": 0.783660352230072, "step": 489 }, { "epoch": 0.1726872246696035, "grad_norm": 1.4288043068768257, "learning_rate": 1.992875386112138e-05, "loss": 1.0166207551956177, "step": 490 }, { "epoch": 0.1730396475770925, "grad_norm": 1.5208280960226344, "learning_rate": 1.9928057918173786e-05, "loss": 0.7692895531654358, "step": 491 }, { "epoch": 0.1733920704845815, "grad_norm": 1.3733404774184526, "learning_rate": 1.9927358604959503e-05, "loss": 0.8005259037017822, "step": 492 }, { "epoch": 0.1737444933920705, "grad_norm": 1.3189354109245792, "learning_rate": 1.9926655921715924e-05, "loss": 0.6780292987823486, "step": 493 }, { "epoch": 0.17409691629955948, "grad_norm": 1.2272422506889333, "learning_rate": 1.9925949868681587e-05, "loss": 0.6501175165176392, "step": 494 }, { "epoch": 0.17444933920704847, "grad_norm": 1.3095934443108421, "learning_rate": 1.9925240446096176e-05, "loss": 0.781839907169342, "step": 495 }, { "epoch": 0.17480176211453743, "grad_norm": 1.4508599784840917, "learning_rate": 1.992452765420052e-05, "loss": 0.7617994546890259, "step": 496 }, { "epoch": 0.17515418502202643, "grad_norm": 1.2324738440312524, "learning_rate": 1.992381149323659e-05, "loss": 0.8019097447395325, "step": 497 }, { "epoch": 0.17550660792951542, "grad_norm": 1.3071824216187324, "learning_rate": 1.9923091963447496e-05, "loss": 0.7526847124099731, "step": 498 }, { "epoch": 0.1758590308370044, "grad_norm": 1.340463358272731, "learning_rate": 1.9922369065077497e-05, "loss": 0.7101150751113892, "step": 499 }, { "epoch": 0.1762114537444934, "grad_norm": 1.396850141714641, "learning_rate": 1.9921642798372e-05, "loss": 0.8519806861877441, "step": 500 }, { "epoch": 0.17656387665198237, "grad_norm": 1.5427241760761283, "learning_rate": 1.9920913163577542e-05, "loss": 0.774759829044342, "step": 501 }, { "epoch": 0.17691629955947136, "grad_norm": 1.4501760642130928, "learning_rate": 1.992018016094182e-05, "loss": 0.8597595691680908, "step": 502 }, { "epoch": 0.17726872246696035, "grad_norm": 1.6336800938277667, "learning_rate": 1.9919443790713658e-05, "loss": 0.7023826241493225, "step": 503 }, { "epoch": 0.17762114537444934, "grad_norm": 1.8758125980343456, "learning_rate": 1.991870405314303e-05, "loss": 0.8290892839431763, "step": 504 }, { "epoch": 0.17797356828193833, "grad_norm": 1.368620384992611, "learning_rate": 1.9917960948481062e-05, "loss": 0.9240517020225525, "step": 505 }, { "epoch": 0.17832599118942732, "grad_norm": 1.4203507781601712, "learning_rate": 1.9917214476980012e-05, "loss": 0.8247153759002686, "step": 506 }, { "epoch": 0.1786784140969163, "grad_norm": 1.5364946844029868, "learning_rate": 1.991646463889328e-05, "loss": 0.9101368188858032, "step": 507 }, { "epoch": 0.17903083700440528, "grad_norm": 1.3883082747026767, "learning_rate": 1.9915711434475416e-05, "loss": 0.7688114643096924, "step": 508 }, { "epoch": 0.17938325991189427, "grad_norm": 1.41173691792053, "learning_rate": 1.9914954863982106e-05, "loss": 0.820112943649292, "step": 509 }, { "epoch": 0.17973568281938326, "grad_norm": 1.2372115494246672, "learning_rate": 1.9914194927670186e-05, "loss": 0.6393542289733887, "step": 510 }, { "epoch": 0.18008810572687226, "grad_norm": 1.5514274082803117, "learning_rate": 1.991343162579763e-05, "loss": 0.9463154673576355, "step": 511 }, { "epoch": 0.18044052863436125, "grad_norm": 1.2818287593652882, "learning_rate": 1.9912664958623556e-05, "loss": 0.9498215913772583, "step": 512 }, { "epoch": 0.1807929515418502, "grad_norm": 1.3538150363158374, "learning_rate": 1.991189492640822e-05, "loss": 0.7659052014350891, "step": 513 }, { "epoch": 0.1811453744493392, "grad_norm": 1.3014303918670855, "learning_rate": 1.9911121529413028e-05, "loss": 0.9946317672729492, "step": 514 }, { "epoch": 0.1814977973568282, "grad_norm": 1.2888096801517381, "learning_rate": 1.991034476790052e-05, "loss": 0.762086033821106, "step": 515 }, { "epoch": 0.18185022026431719, "grad_norm": 1.2685969775930512, "learning_rate": 1.990956464213438e-05, "loss": 0.7507720589637756, "step": 516 }, { "epoch": 0.18220264317180618, "grad_norm": 1.2567492686992259, "learning_rate": 1.990878115237945e-05, "loss": 0.7859716415405273, "step": 517 }, { "epoch": 0.18255506607929514, "grad_norm": 1.3199744761398897, "learning_rate": 1.9907994298901688e-05, "loss": 0.8585234880447388, "step": 518 }, { "epoch": 0.18290748898678413, "grad_norm": 1.2014345702103446, "learning_rate": 1.990720408196821e-05, "loss": 0.8569823503494263, "step": 519 }, { "epoch": 0.18325991189427313, "grad_norm": 1.4066812868889107, "learning_rate": 1.990641050184727e-05, "loss": 0.8297367095947266, "step": 520 }, { "epoch": 0.18361233480176212, "grad_norm": 1.4158335601181062, "learning_rate": 1.9905613558808262e-05, "loss": 0.7918041348457336, "step": 521 }, { "epoch": 0.1839647577092511, "grad_norm": 1.3066639133280875, "learning_rate": 1.9904813253121727e-05, "loss": 0.8322931528091431, "step": 522 }, { "epoch": 0.1843171806167401, "grad_norm": 1.600997340162295, "learning_rate": 1.990400958505934e-05, "loss": 0.6822292804718018, "step": 523 }, { "epoch": 0.18466960352422906, "grad_norm": 1.344951810567012, "learning_rate": 1.9903202554893925e-05, "loss": 0.8989835977554321, "step": 524 }, { "epoch": 0.18502202643171806, "grad_norm": 1.4644963211452282, "learning_rate": 1.990239216289944e-05, "loss": 0.671294093132019, "step": 525 }, { "epoch": 0.18537444933920705, "grad_norm": 1.3104672306859468, "learning_rate": 1.990157840935099e-05, "loss": 0.9045379161834717, "step": 526 }, { "epoch": 0.18572687224669604, "grad_norm": 1.2000125993399395, "learning_rate": 1.990076129452482e-05, "loss": 0.7117471694946289, "step": 527 }, { "epoch": 0.18607929515418503, "grad_norm": 1.406356072194557, "learning_rate": 1.9899940818698315e-05, "loss": 0.890752911567688, "step": 528 }, { "epoch": 0.186431718061674, "grad_norm": 1.3199977159633904, "learning_rate": 1.9899116982149994e-05, "loss": 0.7209222316741943, "step": 529 }, { "epoch": 0.186784140969163, "grad_norm": 1.4346812218183875, "learning_rate": 1.9898289785159534e-05, "loss": 0.6912863254547119, "step": 530 }, { "epoch": 0.18713656387665198, "grad_norm": 1.4271479463954384, "learning_rate": 1.9897459228007736e-05, "loss": 0.7060319185256958, "step": 531 }, { "epoch": 0.18748898678414097, "grad_norm": 1.2685161281492263, "learning_rate": 1.9896625310976553e-05, "loss": 0.6975364685058594, "step": 532 }, { "epoch": 0.18784140969162996, "grad_norm": 1.513572022269192, "learning_rate": 1.989578803434907e-05, "loss": 0.8576006293296814, "step": 533 }, { "epoch": 0.18819383259911895, "grad_norm": 1.3324082350150075, "learning_rate": 1.9894947398409516e-05, "loss": 0.7182095646858215, "step": 534 }, { "epoch": 0.18854625550660792, "grad_norm": 1.4808328503550712, "learning_rate": 1.9894103403443265e-05, "loss": 0.7546031475067139, "step": 535 }, { "epoch": 0.1888986784140969, "grad_norm": 1.4412494852286755, "learning_rate": 1.9893256049736824e-05, "loss": 0.7083312273025513, "step": 536 }, { "epoch": 0.1892511013215859, "grad_norm": 1.178231291011438, "learning_rate": 1.9892405337577846e-05, "loss": 0.5614915490150452, "step": 537 }, { "epoch": 0.1896035242290749, "grad_norm": 1.440292679191453, "learning_rate": 1.9891551267255114e-05, "loss": 0.7647485733032227, "step": 538 }, { "epoch": 0.18995594713656389, "grad_norm": 1.2459192275692494, "learning_rate": 1.9890693839058566e-05, "loss": 0.776042103767395, "step": 539 }, { "epoch": 0.19030837004405288, "grad_norm": 1.4553672353845373, "learning_rate": 1.9889833053279268e-05, "loss": 0.7694810628890991, "step": 540 }, { "epoch": 0.19066079295154184, "grad_norm": 1.3521577159056863, "learning_rate": 1.9888968910209433e-05, "loss": 0.6935995817184448, "step": 541 }, { "epoch": 0.19101321585903083, "grad_norm": 1.7084028073476007, "learning_rate": 1.988810141014241e-05, "loss": 0.7538039088249207, "step": 542 }, { "epoch": 0.19136563876651982, "grad_norm": 1.305324041919721, "learning_rate": 1.9887230553372686e-05, "loss": 0.8149158954620361, "step": 543 }, { "epoch": 0.19171806167400882, "grad_norm": 1.6378712299065388, "learning_rate": 1.988635634019589e-05, "loss": 0.7776780128479004, "step": 544 }, { "epoch": 0.1920704845814978, "grad_norm": 1.48919568324374, "learning_rate": 1.9885478770908793e-05, "loss": 0.8527307510375977, "step": 545 }, { "epoch": 0.19242290748898677, "grad_norm": 1.1804269388923583, "learning_rate": 1.98845978458093e-05, "loss": 0.7239484190940857, "step": 546 }, { "epoch": 0.19277533039647576, "grad_norm": 1.4096556213691402, "learning_rate": 1.9883713565196462e-05, "loss": 0.6937836408615112, "step": 547 }, { "epoch": 0.19312775330396476, "grad_norm": 1.3705456771921078, "learning_rate": 1.9882825929370456e-05, "loss": 0.8567923903465271, "step": 548 }, { "epoch": 0.19348017621145375, "grad_norm": 1.2851787163283013, "learning_rate": 1.9881934938632615e-05, "loss": 0.7948861122131348, "step": 549 }, { "epoch": 0.19383259911894274, "grad_norm": 1.2883171824741761, "learning_rate": 1.9881040593285398e-05, "loss": 0.6808983087539673, "step": 550 }, { "epoch": 0.19418502202643173, "grad_norm": 1.349988333670182, "learning_rate": 1.9880142893632412e-05, "loss": 0.9089908599853516, "step": 551 }, { "epoch": 0.1945374449339207, "grad_norm": 1.373721278775904, "learning_rate": 1.9879241839978393e-05, "loss": 0.7947918176651001, "step": 552 }, { "epoch": 0.1948898678414097, "grad_norm": 1.1679807856929723, "learning_rate": 1.9878337432629224e-05, "loss": 0.880418598651886, "step": 553 }, { "epoch": 0.19524229074889868, "grad_norm": 1.4208435552970164, "learning_rate": 1.9877429671891917e-05, "loss": 0.8845832347869873, "step": 554 }, { "epoch": 0.19559471365638767, "grad_norm": 1.4905396338040395, "learning_rate": 1.9876518558074638e-05, "loss": 0.7635341286659241, "step": 555 }, { "epoch": 0.19594713656387666, "grad_norm": 1.3799865187677636, "learning_rate": 1.9875604091486678e-05, "loss": 0.9301069974899292, "step": 556 }, { "epoch": 0.19629955947136563, "grad_norm": 1.255573262915276, "learning_rate": 1.9874686272438467e-05, "loss": 0.8788589239120483, "step": 557 }, { "epoch": 0.19665198237885462, "grad_norm": 1.2418539833380446, "learning_rate": 1.987376510124158e-05, "loss": 0.7452565431594849, "step": 558 }, { "epoch": 0.1970044052863436, "grad_norm": 1.5278788646328887, "learning_rate": 1.9872840578208722e-05, "loss": 0.819628119468689, "step": 559 }, { "epoch": 0.1973568281938326, "grad_norm": 1.4844496784402743, "learning_rate": 1.9871912703653744e-05, "loss": 0.7807571291923523, "step": 560 }, { "epoch": 0.1977092511013216, "grad_norm": 1.2611555469759475, "learning_rate": 1.9870981477891626e-05, "loss": 0.7091392278671265, "step": 561 }, { "epoch": 0.19806167400881058, "grad_norm": 1.2433638561435678, "learning_rate": 1.9870046901238496e-05, "loss": 0.8174105882644653, "step": 562 }, { "epoch": 0.19841409691629955, "grad_norm": 1.2352337461151273, "learning_rate": 1.9869108974011607e-05, "loss": 0.696865439414978, "step": 563 }, { "epoch": 0.19876651982378854, "grad_norm": 1.4794543945089762, "learning_rate": 1.986816769652936e-05, "loss": 0.914303183555603, "step": 564 }, { "epoch": 0.19911894273127753, "grad_norm": 1.2869835497381619, "learning_rate": 1.986722306911129e-05, "loss": 0.8397856950759888, "step": 565 }, { "epoch": 0.19947136563876652, "grad_norm": 1.216082157504287, "learning_rate": 1.9866275092078066e-05, "loss": 0.7206380367279053, "step": 566 }, { "epoch": 0.19982378854625552, "grad_norm": 1.3259146958291776, "learning_rate": 1.98653237657515e-05, "loss": 0.7017316818237305, "step": 567 }, { "epoch": 0.2001762114537445, "grad_norm": 1.6365100661152858, "learning_rate": 1.9864369090454538e-05, "loss": 0.8797772526741028, "step": 568 }, { "epoch": 0.20052863436123347, "grad_norm": 1.3948984288943356, "learning_rate": 1.9863411066511257e-05, "loss": 0.6643391847610474, "step": 569 }, { "epoch": 0.20088105726872246, "grad_norm": 1.4631143705399865, "learning_rate": 1.9862449694246878e-05, "loss": 0.8662393093109131, "step": 570 }, { "epoch": 0.20123348017621145, "grad_norm": 1.4103722629610054, "learning_rate": 1.9861484973987762e-05, "loss": 0.7766140699386597, "step": 571 }, { "epoch": 0.20158590308370045, "grad_norm": 1.4422501075340284, "learning_rate": 1.9860516906061397e-05, "loss": 0.8582239151000977, "step": 572 }, { "epoch": 0.20193832599118944, "grad_norm": 1.2359229208879663, "learning_rate": 1.9859545490796414e-05, "loss": 0.5838385820388794, "step": 573 }, { "epoch": 0.2022907488986784, "grad_norm": 1.4256083108556754, "learning_rate": 1.9858570728522573e-05, "loss": 0.6715164184570312, "step": 574 }, { "epoch": 0.2026431718061674, "grad_norm": 1.604413564730453, "learning_rate": 1.9857592619570783e-05, "loss": 0.7665218114852905, "step": 575 }, { "epoch": 0.20299559471365639, "grad_norm": 1.3992633216102752, "learning_rate": 1.985661116427308e-05, "loss": 0.8060458898544312, "step": 576 }, { "epoch": 0.20334801762114538, "grad_norm": 1.3647027340900928, "learning_rate": 1.985562636296264e-05, "loss": 0.8354060649871826, "step": 577 }, { "epoch": 0.20370044052863437, "grad_norm": 1.61178503454425, "learning_rate": 1.985463821597376e-05, "loss": 0.8814351558685303, "step": 578 }, { "epoch": 0.20405286343612336, "grad_norm": 1.3581614903846795, "learning_rate": 1.9853646723641895e-05, "loss": 0.9068918228149414, "step": 579 }, { "epoch": 0.20440528634361232, "grad_norm": 1.4217775001953692, "learning_rate": 1.9852651886303624e-05, "loss": 0.7671997547149658, "step": 580 }, { "epoch": 0.20475770925110132, "grad_norm": 1.2987191699893856, "learning_rate": 1.9851653704296664e-05, "loss": 0.7906886339187622, "step": 581 }, { "epoch": 0.2051101321585903, "grad_norm": 1.4550942850887114, "learning_rate": 1.985065217795987e-05, "loss": 0.8424232006072998, "step": 582 }, { "epoch": 0.2054625550660793, "grad_norm": 1.2767538498679667, "learning_rate": 1.984964730763322e-05, "loss": 0.8335819244384766, "step": 583 }, { "epoch": 0.2058149779735683, "grad_norm": 1.2913652769028938, "learning_rate": 1.9848639093657844e-05, "loss": 0.8340694308280945, "step": 584 }, { "epoch": 0.20616740088105726, "grad_norm": 1.3161255240413319, "learning_rate": 1.9847627536376e-05, "loss": 0.9228274822235107, "step": 585 }, { "epoch": 0.20651982378854625, "grad_norm": 1.548405161064148, "learning_rate": 1.984661263613107e-05, "loss": 0.7843449115753174, "step": 586 }, { "epoch": 0.20687224669603524, "grad_norm": 1.3039537503613003, "learning_rate": 1.9845594393267594e-05, "loss": 0.7411990165710449, "step": 587 }, { "epoch": 0.20722466960352423, "grad_norm": 1.3644443695047568, "learning_rate": 1.9844572808131228e-05, "loss": 0.7520540356636047, "step": 588 }, { "epoch": 0.20757709251101322, "grad_norm": 1.2894133104841217, "learning_rate": 1.9843547881068763e-05, "loss": 0.795365571975708, "step": 589 }, { "epoch": 0.20792951541850221, "grad_norm": 1.280356655308606, "learning_rate": 1.984251961242814e-05, "loss": 0.8415528535842896, "step": 590 }, { "epoch": 0.20828193832599118, "grad_norm": 1.4654647998731167, "learning_rate": 1.9841488002558416e-05, "loss": 0.8555570244789124, "step": 591 }, { "epoch": 0.20863436123348017, "grad_norm": 1.314593410908928, "learning_rate": 1.9840453051809792e-05, "loss": 0.8214600086212158, "step": 592 }, { "epoch": 0.20898678414096916, "grad_norm": 1.2598900623176714, "learning_rate": 1.9839414760533607e-05, "loss": 0.7746415138244629, "step": 593 }, { "epoch": 0.20933920704845815, "grad_norm": 1.6285440778435663, "learning_rate": 1.9838373129082325e-05, "loss": 1.0861419439315796, "step": 594 }, { "epoch": 0.20969162995594715, "grad_norm": 1.327372383451943, "learning_rate": 1.9837328157809547e-05, "loss": 0.7530953884124756, "step": 595 }, { "epoch": 0.21004405286343614, "grad_norm": 1.420023169388647, "learning_rate": 1.9836279847070004e-05, "loss": 0.8811959624290466, "step": 596 }, { "epoch": 0.2103964757709251, "grad_norm": 1.2274254083036087, "learning_rate": 1.9835228197219573e-05, "loss": 0.7956523299217224, "step": 597 }, { "epoch": 0.2107488986784141, "grad_norm": 1.306015861681406, "learning_rate": 1.9834173208615253e-05, "loss": 0.8710414171218872, "step": 598 }, { "epoch": 0.21110132158590308, "grad_norm": 1.303850147164254, "learning_rate": 1.983311488161518e-05, "loss": 0.9057297706604004, "step": 599 }, { "epoch": 0.21145374449339208, "grad_norm": 1.2517049783711822, "learning_rate": 1.983205321657862e-05, "loss": 0.7531988024711609, "step": 600 }, { "epoch": 0.21180616740088107, "grad_norm": 1.392455719061042, "learning_rate": 1.983098821386598e-05, "loss": 0.6508063077926636, "step": 601 }, { "epoch": 0.21215859030837003, "grad_norm": 1.238668234857589, "learning_rate": 1.9829919873838796e-05, "loss": 0.7267025709152222, "step": 602 }, { "epoch": 0.21251101321585902, "grad_norm": 1.2232739877442529, "learning_rate": 1.9828848196859727e-05, "loss": 0.6930510997772217, "step": 603 }, { "epoch": 0.21286343612334802, "grad_norm": 1.4104259448916805, "learning_rate": 1.9827773183292583e-05, "loss": 0.7613120079040527, "step": 604 }, { "epoch": 0.213215859030837, "grad_norm": 1.2586328753898472, "learning_rate": 1.9826694833502295e-05, "loss": 0.763299822807312, "step": 605 }, { "epoch": 0.213568281938326, "grad_norm": 1.4431352363644856, "learning_rate": 1.9825613147854928e-05, "loss": 0.7599194049835205, "step": 606 }, { "epoch": 0.213920704845815, "grad_norm": 1.3487971590690426, "learning_rate": 1.9824528126717687e-05, "loss": 0.869399905204773, "step": 607 }, { "epoch": 0.21427312775330395, "grad_norm": 1.3853231700631432, "learning_rate": 1.9823439770458893e-05, "loss": 0.733409583568573, "step": 608 }, { "epoch": 0.21462555066079295, "grad_norm": 1.2766333009964275, "learning_rate": 1.9822348079448014e-05, "loss": 0.8302386999130249, "step": 609 }, { "epoch": 0.21497797356828194, "grad_norm": 1.1872454682531661, "learning_rate": 1.9821253054055645e-05, "loss": 0.8234561681747437, "step": 610 }, { "epoch": 0.21533039647577093, "grad_norm": 1.336729476582052, "learning_rate": 1.9820154694653514e-05, "loss": 0.81988525390625, "step": 611 }, { "epoch": 0.21568281938325992, "grad_norm": 1.1619766622665528, "learning_rate": 1.9819053001614478e-05, "loss": 0.6437678933143616, "step": 612 }, { "epoch": 0.21603524229074889, "grad_norm": 1.398835884660331, "learning_rate": 1.9817947975312527e-05, "loss": 0.8256562948226929, "step": 613 }, { "epoch": 0.21638766519823788, "grad_norm": 1.4423824320045469, "learning_rate": 1.9816839616122787e-05, "loss": 0.8204725980758667, "step": 614 }, { "epoch": 0.21674008810572687, "grad_norm": 1.4648639859051293, "learning_rate": 1.9815727924421507e-05, "loss": 0.7492775917053223, "step": 615 }, { "epoch": 0.21709251101321586, "grad_norm": 1.4585481343848268, "learning_rate": 1.9814612900586075e-05, "loss": 0.629736065864563, "step": 616 }, { "epoch": 0.21744493392070485, "grad_norm": 1.3908853161597456, "learning_rate": 1.9813494544995e-05, "loss": 0.7974159717559814, "step": 617 }, { "epoch": 0.21779735682819384, "grad_norm": 1.4158223772493663, "learning_rate": 1.981237285802794e-05, "loss": 0.8367668390274048, "step": 618 }, { "epoch": 0.2181497797356828, "grad_norm": 1.2051753792883582, "learning_rate": 1.9811247840065667e-05, "loss": 0.7942521572113037, "step": 619 }, { "epoch": 0.2185022026431718, "grad_norm": 1.3827277102573685, "learning_rate": 1.981011949149009e-05, "loss": 0.7863545417785645, "step": 620 }, { "epoch": 0.2188546255506608, "grad_norm": 1.3809343727942922, "learning_rate": 1.9808987812684247e-05, "loss": 0.8667019605636597, "step": 621 }, { "epoch": 0.21920704845814978, "grad_norm": 1.5738475739563456, "learning_rate": 1.9807852804032306e-05, "loss": 0.8555353283882141, "step": 622 }, { "epoch": 0.21955947136563878, "grad_norm": 1.244926951925701, "learning_rate": 1.9806714465919573e-05, "loss": 0.8170013427734375, "step": 623 }, { "epoch": 0.21991189427312777, "grad_norm": 1.331256668600172, "learning_rate": 1.9805572798732475e-05, "loss": 0.9277342557907104, "step": 624 }, { "epoch": 0.22026431718061673, "grad_norm": 1.4090219105247375, "learning_rate": 1.980442780285857e-05, "loss": 0.6536964178085327, "step": 625 }, { "epoch": 0.22061674008810572, "grad_norm": 1.4088256669280743, "learning_rate": 1.980327947868655e-05, "loss": 0.7197799682617188, "step": 626 }, { "epoch": 0.22096916299559471, "grad_norm": 1.1381025512945977, "learning_rate": 1.980212782660624e-05, "loss": 0.7558401823043823, "step": 627 }, { "epoch": 0.2213215859030837, "grad_norm": 1.4031284519802554, "learning_rate": 1.9800972847008586e-05, "loss": 0.7918291091918945, "step": 628 }, { "epoch": 0.2216740088105727, "grad_norm": 1.4810910878326864, "learning_rate": 1.979981454028567e-05, "loss": 0.7159492373466492, "step": 629 }, { "epoch": 0.22202643171806166, "grad_norm": 1.5418605472416471, "learning_rate": 1.9798652906830694e-05, "loss": 0.854686439037323, "step": 630 }, { "epoch": 0.22237885462555065, "grad_norm": 1.6329149097762432, "learning_rate": 1.9797487947038007e-05, "loss": 0.736785888671875, "step": 631 }, { "epoch": 0.22273127753303965, "grad_norm": 1.2749674694710476, "learning_rate": 1.9796319661303065e-05, "loss": 0.7092996835708618, "step": 632 }, { "epoch": 0.22308370044052864, "grad_norm": 1.4592836621170417, "learning_rate": 1.9795148050022477e-05, "loss": 0.8890455961227417, "step": 633 }, { "epoch": 0.22343612334801763, "grad_norm": 1.2618947600836363, "learning_rate": 1.979397311359396e-05, "loss": 0.7476855516433716, "step": 634 }, { "epoch": 0.22378854625550662, "grad_norm": 1.4307363207113668, "learning_rate": 1.979279485241637e-05, "loss": 0.7810029983520508, "step": 635 }, { "epoch": 0.22414096916299558, "grad_norm": 1.2070666788938549, "learning_rate": 1.9791613266889688e-05, "loss": 0.6679891347885132, "step": 636 }, { "epoch": 0.22449339207048458, "grad_norm": 1.6320710320094325, "learning_rate": 1.979042835741503e-05, "loss": 0.809790849685669, "step": 637 }, { "epoch": 0.22484581497797357, "grad_norm": 1.6737967848633384, "learning_rate": 1.9789240124394638e-05, "loss": 0.8347213268280029, "step": 638 }, { "epoch": 0.22519823788546256, "grad_norm": 1.1935958187808327, "learning_rate": 1.9788048568231875e-05, "loss": 0.6620997190475464, "step": 639 }, { "epoch": 0.22555066079295155, "grad_norm": 1.2898316066784317, "learning_rate": 1.9786853689331235e-05, "loss": 0.7727694511413574, "step": 640 }, { "epoch": 0.22590308370044052, "grad_norm": 1.2854878709867101, "learning_rate": 1.9785655488098348e-05, "loss": 0.7433278560638428, "step": 641 }, { "epoch": 0.2262555066079295, "grad_norm": 1.3523753090224933, "learning_rate": 1.9784453964939966e-05, "loss": 0.7375571727752686, "step": 642 }, { "epoch": 0.2266079295154185, "grad_norm": 1.3285668366741343, "learning_rate": 1.9783249120263962e-05, "loss": 0.5838407874107361, "step": 643 }, { "epoch": 0.2269603524229075, "grad_norm": 1.3906475095958148, "learning_rate": 1.978204095447935e-05, "loss": 0.7120088934898376, "step": 644 }, { "epoch": 0.22731277533039648, "grad_norm": 1.5058740006044322, "learning_rate": 1.9780829467996262e-05, "loss": 0.7668102383613586, "step": 645 }, { "epoch": 0.22766519823788547, "grad_norm": 1.2993959173766831, "learning_rate": 1.977961466122596e-05, "loss": 0.748942494392395, "step": 646 }, { "epoch": 0.22801762114537444, "grad_norm": 1.476253609353715, "learning_rate": 1.9778396534580836e-05, "loss": 0.7569374442100525, "step": 647 }, { "epoch": 0.22837004405286343, "grad_norm": 1.352884217242173, "learning_rate": 1.97771750884744e-05, "loss": 0.7981363534927368, "step": 648 }, { "epoch": 0.22872246696035242, "grad_norm": 1.5069792289976334, "learning_rate": 1.97759503233213e-05, "loss": 0.7501301765441895, "step": 649 }, { "epoch": 0.2290748898678414, "grad_norm": 1.4079968546467614, "learning_rate": 1.9774722239537305e-05, "loss": 0.7880003452301025, "step": 650 }, { "epoch": 0.2294273127753304, "grad_norm": 1.3141024886679253, "learning_rate": 1.977349083753931e-05, "loss": 0.9007930755615234, "step": 651 }, { "epoch": 0.2297797356828194, "grad_norm": 1.1634171776911992, "learning_rate": 1.9772256117745335e-05, "loss": 0.6291126012802124, "step": 652 }, { "epoch": 0.23013215859030836, "grad_norm": 1.1487631323898542, "learning_rate": 1.9771018080574534e-05, "loss": 0.8155031204223633, "step": 653 }, { "epoch": 0.23048458149779735, "grad_norm": 1.2941785819245946, "learning_rate": 1.976977672644718e-05, "loss": 0.7103240489959717, "step": 654 }, { "epoch": 0.23083700440528634, "grad_norm": 1.4170836267106273, "learning_rate": 1.9768532055784678e-05, "loss": 0.8590278625488281, "step": 655 }, { "epoch": 0.23118942731277534, "grad_norm": 1.6156852038452685, "learning_rate": 1.9767284069009545e-05, "loss": 0.7729001641273499, "step": 656 }, { "epoch": 0.23154185022026433, "grad_norm": 1.543950265697803, "learning_rate": 1.9766032766545445e-05, "loss": 0.8287409543991089, "step": 657 }, { "epoch": 0.2318942731277533, "grad_norm": 1.327581925526745, "learning_rate": 1.9764778148817147e-05, "loss": 0.8651477098464966, "step": 658 }, { "epoch": 0.23224669603524228, "grad_norm": 1.3954780395501065, "learning_rate": 1.976352021625056e-05, "loss": 0.7582576274871826, "step": 659 }, { "epoch": 0.23259911894273128, "grad_norm": 1.2510605377459358, "learning_rate": 1.976225896927271e-05, "loss": 0.6579675078392029, "step": 660 }, { "epoch": 0.23295154185022027, "grad_norm": 1.4130234326235036, "learning_rate": 1.9760994408311757e-05, "loss": 0.8817700147628784, "step": 661 }, { "epoch": 0.23330396475770926, "grad_norm": 1.3799441341137708, "learning_rate": 1.9759726533796976e-05, "loss": 0.7241606712341309, "step": 662 }, { "epoch": 0.23365638766519825, "grad_norm": 1.2880787484904483, "learning_rate": 1.9758455346158768e-05, "loss": 0.7434183359146118, "step": 663 }, { "epoch": 0.23400881057268721, "grad_norm": 1.3406860649308125, "learning_rate": 1.9757180845828663e-05, "loss": 0.632422685623169, "step": 664 }, { "epoch": 0.2343612334801762, "grad_norm": 1.394213400542702, "learning_rate": 1.9755903033239318e-05, "loss": 0.7276040315628052, "step": 665 }, { "epoch": 0.2347136563876652, "grad_norm": 1.4191729622512466, "learning_rate": 1.975462190882451e-05, "loss": 0.8070325255393982, "step": 666 }, { "epoch": 0.2350660792951542, "grad_norm": 1.505939347053283, "learning_rate": 1.9753337473019133e-05, "loss": 0.867915689945221, "step": 667 }, { "epoch": 0.23541850220264318, "grad_norm": 1.2080841146883634, "learning_rate": 1.9752049726259223e-05, "loss": 0.7905307412147522, "step": 668 }, { "epoch": 0.23577092511013215, "grad_norm": 1.3166867899458456, "learning_rate": 1.9750758668981925e-05, "loss": 0.7721420526504517, "step": 669 }, { "epoch": 0.23612334801762114, "grad_norm": 1.3746426458674128, "learning_rate": 1.9749464301625515e-05, "loss": 0.7926005125045776, "step": 670 }, { "epoch": 0.23647577092511013, "grad_norm": 1.387001164209418, "learning_rate": 1.974816662462939e-05, "loss": 0.7651785612106323, "step": 671 }, { "epoch": 0.23682819383259912, "grad_norm": 1.3285492717471519, "learning_rate": 1.974686563843407e-05, "loss": 0.7548795938491821, "step": 672 }, { "epoch": 0.2371806167400881, "grad_norm": 1.256836928643264, "learning_rate": 1.9745561343481197e-05, "loss": 0.5405399799346924, "step": 673 }, { "epoch": 0.2375330396475771, "grad_norm": 1.429166434081011, "learning_rate": 1.9744253740213542e-05, "loss": 0.7561137080192566, "step": 674 }, { "epoch": 0.23788546255506607, "grad_norm": 1.2880562459402407, "learning_rate": 1.9742942829074993e-05, "loss": 0.8809534907341003, "step": 675 }, { "epoch": 0.23823788546255506, "grad_norm": 1.4170174919214424, "learning_rate": 1.974162861051057e-05, "loss": 0.750350832939148, "step": 676 }, { "epoch": 0.23859030837004405, "grad_norm": 1.629083058939835, "learning_rate": 1.9740311084966398e-05, "loss": 0.89476478099823, "step": 677 }, { "epoch": 0.23894273127753304, "grad_norm": 1.2576348651951754, "learning_rate": 1.9738990252889748e-05, "loss": 0.8647176027297974, "step": 678 }, { "epoch": 0.23929515418502204, "grad_norm": 1.4086313229573832, "learning_rate": 1.9737666114728996e-05, "loss": 0.7331727743148804, "step": 679 }, { "epoch": 0.239647577092511, "grad_norm": 1.471872239566745, "learning_rate": 1.9736338670933642e-05, "loss": 0.7714364528656006, "step": 680 }, { "epoch": 0.24, "grad_norm": 1.2246586432486557, "learning_rate": 1.973500792195432e-05, "loss": 0.7840908765792847, "step": 681 }, { "epoch": 0.24035242290748898, "grad_norm": 1.5714154435783916, "learning_rate": 1.9733673868242767e-05, "loss": 0.8723878860473633, "step": 682 }, { "epoch": 0.24070484581497797, "grad_norm": 1.3325473695906174, "learning_rate": 1.9732336510251864e-05, "loss": 0.782090425491333, "step": 683 }, { "epoch": 0.24105726872246697, "grad_norm": 1.4114017797446734, "learning_rate": 1.9730995848435594e-05, "loss": 0.8000990152359009, "step": 684 }, { "epoch": 0.24140969162995596, "grad_norm": 1.2098442989857856, "learning_rate": 1.9729651883249075e-05, "loss": 0.7499237060546875, "step": 685 }, { "epoch": 0.24176211453744492, "grad_norm": 1.376086425817015, "learning_rate": 1.972830461514854e-05, "loss": 0.8786858916282654, "step": 686 }, { "epoch": 0.2421145374449339, "grad_norm": 1.2058295584451697, "learning_rate": 1.972695404459134e-05, "loss": 0.7039557695388794, "step": 687 }, { "epoch": 0.2424669603524229, "grad_norm": 1.2391412724176054, "learning_rate": 1.9725600172035962e-05, "loss": 0.6699448823928833, "step": 688 }, { "epoch": 0.2428193832599119, "grad_norm": 1.4984585662906706, "learning_rate": 1.9724242997941995e-05, "loss": 0.6753977537155151, "step": 689 }, { "epoch": 0.2431718061674009, "grad_norm": 1.465232022987203, "learning_rate": 1.9722882522770163e-05, "loss": 0.7139854431152344, "step": 690 }, { "epoch": 0.24352422907488988, "grad_norm": 1.2814158831499989, "learning_rate": 1.9721518746982296e-05, "loss": 0.7894896864891052, "step": 691 }, { "epoch": 0.24387665198237884, "grad_norm": 1.2615077213285395, "learning_rate": 1.972015167104136e-05, "loss": 0.5663755536079407, "step": 692 }, { "epoch": 0.24422907488986784, "grad_norm": 2.058599574246893, "learning_rate": 1.971878129541144e-05, "loss": 0.8607856035232544, "step": 693 }, { "epoch": 0.24458149779735683, "grad_norm": 1.351791839280567, "learning_rate": 1.9717407620557724e-05, "loss": 0.7384383678436279, "step": 694 }, { "epoch": 0.24493392070484582, "grad_norm": 1.3580988060863546, "learning_rate": 1.971603064694654e-05, "loss": 0.6145502328872681, "step": 695 }, { "epoch": 0.2452863436123348, "grad_norm": 1.216736398001555, "learning_rate": 1.9714650375045328e-05, "loss": 0.6758620738983154, "step": 696 }, { "epoch": 0.24563876651982378, "grad_norm": 1.4471588548341505, "learning_rate": 1.9713266805322643e-05, "loss": 0.7416598200798035, "step": 697 }, { "epoch": 0.24599118942731277, "grad_norm": 1.5476710427855191, "learning_rate": 1.9711879938248163e-05, "loss": 0.7603555917739868, "step": 698 }, { "epoch": 0.24634361233480176, "grad_norm": 1.442293220466076, "learning_rate": 1.9710489774292692e-05, "loss": 0.9119949340820312, "step": 699 }, { "epoch": 0.24669603524229075, "grad_norm": 1.3843099449438452, "learning_rate": 1.9709096313928144e-05, "loss": 0.6884537935256958, "step": 700 }, { "epoch": 0.24704845814977974, "grad_norm": 1.618333940643818, "learning_rate": 1.9707699557627554e-05, "loss": 0.7928721904754639, "step": 701 }, { "epoch": 0.24740088105726873, "grad_norm": 1.593414442103489, "learning_rate": 1.970629950586508e-05, "loss": 0.888218104839325, "step": 702 }, { "epoch": 0.2477533039647577, "grad_norm": 1.484965940613647, "learning_rate": 1.9704896159115997e-05, "loss": 0.7949875593185425, "step": 703 }, { "epoch": 0.2481057268722467, "grad_norm": 1.5094809465076762, "learning_rate": 1.970348951785669e-05, "loss": 0.9031823873519897, "step": 704 }, { "epoch": 0.24845814977973568, "grad_norm": 1.4099687182713576, "learning_rate": 1.9702079582564682e-05, "loss": 0.636865496635437, "step": 705 }, { "epoch": 0.24881057268722467, "grad_norm": 1.5392719282626255, "learning_rate": 1.9700666353718593e-05, "loss": 0.731717586517334, "step": 706 }, { "epoch": 0.24916299559471367, "grad_norm": 1.5878589631749256, "learning_rate": 1.9699249831798172e-05, "loss": 0.7571220397949219, "step": 707 }, { "epoch": 0.24951541850220263, "grad_norm": 1.5180992539956903, "learning_rate": 1.969783001728429e-05, "loss": 0.6112762689590454, "step": 708 }, { "epoch": 0.24986784140969162, "grad_norm": 1.3651864060041954, "learning_rate": 1.9696406910658918e-05, "loss": 0.6737902164459229, "step": 709 }, { "epoch": 0.25022026431718064, "grad_norm": 1.328645038543607, "learning_rate": 1.9694980512405167e-05, "loss": 0.6525848507881165, "step": 710 }, { "epoch": 0.2505726872246696, "grad_norm": 1.302186292631501, "learning_rate": 1.9693550823007248e-05, "loss": 0.9107403755187988, "step": 711 }, { "epoch": 0.25092511013215857, "grad_norm": 1.5423262639437814, "learning_rate": 1.96921178429505e-05, "loss": 0.7373934984207153, "step": 712 }, { "epoch": 0.25127753303964756, "grad_norm": 1.4043304459804222, "learning_rate": 1.9690681572721377e-05, "loss": 0.6383399963378906, "step": 713 }, { "epoch": 0.25162995594713655, "grad_norm": 1.3203935888344693, "learning_rate": 1.9689242012807442e-05, "loss": 0.6600236296653748, "step": 714 }, { "epoch": 0.25198237885462554, "grad_norm": 1.6489156261044324, "learning_rate": 1.9687799163697386e-05, "loss": 0.9195891618728638, "step": 715 }, { "epoch": 0.25233480176211454, "grad_norm": 1.300868905936819, "learning_rate": 1.968635302588101e-05, "loss": 0.7122433185577393, "step": 716 }, { "epoch": 0.2526872246696035, "grad_norm": 1.467731789065586, "learning_rate": 1.968490359984923e-05, "loss": 0.7601606845855713, "step": 717 }, { "epoch": 0.2530396475770925, "grad_norm": 1.2967441771844141, "learning_rate": 1.9683450886094087e-05, "loss": 0.8216352462768555, "step": 718 }, { "epoch": 0.2533920704845815, "grad_norm": 1.4134852768930402, "learning_rate": 1.9681994885108727e-05, "loss": 0.8783165216445923, "step": 719 }, { "epoch": 0.2537444933920705, "grad_norm": 1.5566095938184208, "learning_rate": 1.9680535597387416e-05, "loss": 0.7323269844055176, "step": 720 }, { "epoch": 0.2540969162995595, "grad_norm": 1.6250423495927373, "learning_rate": 1.9679073023425542e-05, "loss": 0.93906170129776, "step": 721 }, { "epoch": 0.25444933920704843, "grad_norm": 1.3857164700730882, "learning_rate": 1.96776071637196e-05, "loss": 0.774397611618042, "step": 722 }, { "epoch": 0.2548017621145374, "grad_norm": 1.3653604324598565, "learning_rate": 1.9676138018767204e-05, "loss": 0.6634535789489746, "step": 723 }, { "epoch": 0.2551541850220264, "grad_norm": 1.3364894441034205, "learning_rate": 1.9674665589067082e-05, "loss": 0.7705625295639038, "step": 724 }, { "epoch": 0.2555066079295154, "grad_norm": 1.5708708799323368, "learning_rate": 1.9673189875119082e-05, "loss": 0.706364631652832, "step": 725 }, { "epoch": 0.2558590308370044, "grad_norm": 1.2599963014034798, "learning_rate": 1.9671710877424158e-05, "loss": 0.7295894622802734, "step": 726 }, { "epoch": 0.2562114537444934, "grad_norm": 1.6926806599843667, "learning_rate": 1.9670228596484383e-05, "loss": 0.8135089874267578, "step": 727 }, { "epoch": 0.2565638766519824, "grad_norm": 1.5978181657651334, "learning_rate": 1.966874303280295e-05, "loss": 0.801734209060669, "step": 728 }, { "epoch": 0.2569162995594714, "grad_norm": 1.728546952239603, "learning_rate": 1.9667254186884164e-05, "loss": 0.8405104875564575, "step": 729 }, { "epoch": 0.25726872246696036, "grad_norm": 1.2523029350782668, "learning_rate": 1.9665762059233434e-05, "loss": 0.8320014476776123, "step": 730 }, { "epoch": 0.25762114537444936, "grad_norm": 1.2667340666882572, "learning_rate": 1.96642666503573e-05, "loss": 0.8701308965682983, "step": 731 }, { "epoch": 0.25797356828193835, "grad_norm": 1.1982399130470203, "learning_rate": 1.9662767960763394e-05, "loss": 0.7980693578720093, "step": 732 }, { "epoch": 0.25832599118942734, "grad_norm": 1.3765503313855298, "learning_rate": 1.9661265990960486e-05, "loss": 0.7258214950561523, "step": 733 }, { "epoch": 0.2586784140969163, "grad_norm": 1.1683887680739682, "learning_rate": 1.9659760741458444e-05, "loss": 0.6860172748565674, "step": 734 }, { "epoch": 0.25903083700440527, "grad_norm": 1.4034749748766104, "learning_rate": 1.9658252212768252e-05, "loss": 0.7438071370124817, "step": 735 }, { "epoch": 0.25938325991189426, "grad_norm": 1.6140837506314978, "learning_rate": 1.9656740405402007e-05, "loss": 0.8680309057235718, "step": 736 }, { "epoch": 0.25973568281938325, "grad_norm": 1.5365221656010954, "learning_rate": 1.9655225319872925e-05, "loss": 0.933163046836853, "step": 737 }, { "epoch": 0.26008810572687224, "grad_norm": 1.3636194628802456, "learning_rate": 1.9653706956695333e-05, "loss": 0.8746597170829773, "step": 738 }, { "epoch": 0.26044052863436123, "grad_norm": 1.31799671460777, "learning_rate": 1.965218531638466e-05, "loss": 0.857211709022522, "step": 739 }, { "epoch": 0.2607929515418502, "grad_norm": 1.313241643085953, "learning_rate": 1.965066039945746e-05, "loss": 0.7837733030319214, "step": 740 }, { "epoch": 0.2611453744493392, "grad_norm": 1.3527479757495662, "learning_rate": 1.9649132206431395e-05, "loss": 0.8401491641998291, "step": 741 }, { "epoch": 0.2614977973568282, "grad_norm": 1.14302378839197, "learning_rate": 1.9647600737825235e-05, "loss": 0.7070307731628418, "step": 742 }, { "epoch": 0.2618502202643172, "grad_norm": 1.756317003631787, "learning_rate": 1.9646065994158873e-05, "loss": 0.7649509310722351, "step": 743 }, { "epoch": 0.2622026431718062, "grad_norm": 1.5152987231460182, "learning_rate": 1.9644527975953302e-05, "loss": 0.7759182453155518, "step": 744 }, { "epoch": 0.26255506607929513, "grad_norm": 1.5151017458848213, "learning_rate": 1.9642986683730626e-05, "loss": 0.8176295757293701, "step": 745 }, { "epoch": 0.2629074889867841, "grad_norm": 1.2974538182792636, "learning_rate": 1.9641442118014078e-05, "loss": 0.8406162261962891, "step": 746 }, { "epoch": 0.2632599118942731, "grad_norm": 1.3410871141615202, "learning_rate": 1.9639894279327985e-05, "loss": 0.8064795732498169, "step": 747 }, { "epoch": 0.2636123348017621, "grad_norm": 1.2769637989850176, "learning_rate": 1.9638343168197784e-05, "loss": 0.6662956476211548, "step": 748 }, { "epoch": 0.2639647577092511, "grad_norm": 1.5105008685571195, "learning_rate": 1.9636788785150037e-05, "loss": 0.8747783899307251, "step": 749 }, { "epoch": 0.2643171806167401, "grad_norm": 1.4261291763421449, "learning_rate": 1.9635231130712406e-05, "loss": 0.7893349528312683, "step": 750 }, { "epoch": 0.2646696035242291, "grad_norm": 1.2907133964100823, "learning_rate": 1.9633670205413665e-05, "loss": 0.7380903959274292, "step": 751 }, { "epoch": 0.26502202643171807, "grad_norm": 1.5293000163357584, "learning_rate": 1.96321060097837e-05, "loss": 0.9164873957633972, "step": 752 }, { "epoch": 0.26537444933920706, "grad_norm": 1.5448314355627197, "learning_rate": 1.9630538544353505e-05, "loss": 0.7664264440536499, "step": 753 }, { "epoch": 0.26572687224669606, "grad_norm": 1.4037068281656377, "learning_rate": 1.9628967809655187e-05, "loss": 0.8117275238037109, "step": 754 }, { "epoch": 0.26607929515418505, "grad_norm": 1.3044642797371147, "learning_rate": 1.9627393806221967e-05, "loss": 0.6203808784484863, "step": 755 }, { "epoch": 0.266431718061674, "grad_norm": 1.5462507455011187, "learning_rate": 1.9625816534588163e-05, "loss": 0.8777878284454346, "step": 756 }, { "epoch": 0.266784140969163, "grad_norm": 1.2883365910622429, "learning_rate": 1.9624235995289212e-05, "loss": 0.6984438300132751, "step": 757 }, { "epoch": 0.26713656387665197, "grad_norm": 1.5746997664717406, "learning_rate": 1.962265218886166e-05, "loss": 0.7806228399276733, "step": 758 }, { "epoch": 0.26748898678414096, "grad_norm": 1.318579751564355, "learning_rate": 1.9621065115843155e-05, "loss": 0.6924373507499695, "step": 759 }, { "epoch": 0.26784140969162995, "grad_norm": 1.2867883287922122, "learning_rate": 1.9619474776772462e-05, "loss": 0.6809841394424438, "step": 760 }, { "epoch": 0.26819383259911894, "grad_norm": 1.3766475304418688, "learning_rate": 1.961788117218945e-05, "loss": 0.8346723318099976, "step": 761 }, { "epoch": 0.26854625550660793, "grad_norm": 1.3717126814625271, "learning_rate": 1.96162843026351e-05, "loss": 0.8000205755233765, "step": 762 }, { "epoch": 0.2688986784140969, "grad_norm": 1.256040752163899, "learning_rate": 1.9614684168651504e-05, "loss": 0.8026692271232605, "step": 763 }, { "epoch": 0.2692511013215859, "grad_norm": 1.4850412299335856, "learning_rate": 1.961308077078185e-05, "loss": 0.921292781829834, "step": 764 }, { "epoch": 0.2696035242290749, "grad_norm": 1.6577133714061814, "learning_rate": 1.9611474109570446e-05, "loss": 0.8018487095832825, "step": 765 }, { "epoch": 0.2699559471365639, "grad_norm": 1.2440268554728864, "learning_rate": 1.9609864185562698e-05, "loss": 0.7400588989257812, "step": 766 }, { "epoch": 0.27030837004405284, "grad_norm": 1.2094824954459686, "learning_rate": 1.960825099930513e-05, "loss": 0.6243399977684021, "step": 767 }, { "epoch": 0.27066079295154183, "grad_norm": 1.415024134390762, "learning_rate": 1.9606634551345373e-05, "loss": 0.7680903673171997, "step": 768 }, { "epoch": 0.2710132158590308, "grad_norm": 1.3126349106428246, "learning_rate": 1.960501484223215e-05, "loss": 0.8783930540084839, "step": 769 }, { "epoch": 0.2713656387665198, "grad_norm": 1.4964756858010921, "learning_rate": 1.9603391872515308e-05, "loss": 0.7910561561584473, "step": 770 }, { "epoch": 0.2717180616740088, "grad_norm": 1.4400527227532898, "learning_rate": 1.9601765642745795e-05, "loss": 0.7325295209884644, "step": 771 }, { "epoch": 0.2720704845814978, "grad_norm": 1.3018158119605838, "learning_rate": 1.9600136153475666e-05, "loss": 0.7017170190811157, "step": 772 }, { "epoch": 0.2724229074889868, "grad_norm": 1.5395904311410002, "learning_rate": 1.959850340525808e-05, "loss": 0.9281908273696899, "step": 773 }, { "epoch": 0.2727753303964758, "grad_norm": 1.256408104414643, "learning_rate": 1.95968673986473e-05, "loss": 0.7421029806137085, "step": 774 }, { "epoch": 0.27312775330396477, "grad_norm": 1.3171523536350294, "learning_rate": 1.9595228134198708e-05, "loss": 0.7474848031997681, "step": 775 }, { "epoch": 0.27348017621145376, "grad_norm": 1.3683438241049553, "learning_rate": 1.9593585612468776e-05, "loss": 0.7267760038375854, "step": 776 }, { "epoch": 0.27383259911894275, "grad_norm": 1.4883233103137832, "learning_rate": 1.9591939834015096e-05, "loss": 0.739683985710144, "step": 777 }, { "epoch": 0.2741850220264317, "grad_norm": 1.2437408403604437, "learning_rate": 1.9590290799396353e-05, "loss": 0.6615399122238159, "step": 778 }, { "epoch": 0.2745374449339207, "grad_norm": 1.5863201035209105, "learning_rate": 1.9588638509172343e-05, "loss": 0.8045977354049683, "step": 779 }, { "epoch": 0.2748898678414097, "grad_norm": 1.5522608295626732, "learning_rate": 1.958698296390397e-05, "loss": 0.8760169744491577, "step": 780 }, { "epoch": 0.27524229074889867, "grad_norm": 1.5297761597873432, "learning_rate": 1.9585324164153236e-05, "loss": 0.6676662564277649, "step": 781 }, { "epoch": 0.27559471365638766, "grad_norm": 1.1706549585314092, "learning_rate": 1.958366211048326e-05, "loss": 0.6650630235671997, "step": 782 }, { "epoch": 0.27594713656387665, "grad_norm": 1.157826702613003, "learning_rate": 1.9581996803458248e-05, "loss": 0.7399466037750244, "step": 783 }, { "epoch": 0.27629955947136564, "grad_norm": 1.316772401506473, "learning_rate": 1.9580328243643528e-05, "loss": 0.6121753454208374, "step": 784 }, { "epoch": 0.27665198237885463, "grad_norm": 1.334268754223845, "learning_rate": 1.9578656431605515e-05, "loss": 0.8562870025634766, "step": 785 }, { "epoch": 0.2770044052863436, "grad_norm": 1.441421130314338, "learning_rate": 1.9576981367911746e-05, "loss": 0.717842161655426, "step": 786 }, { "epoch": 0.2773568281938326, "grad_norm": 1.582720247126145, "learning_rate": 1.9575303053130847e-05, "loss": 0.802294135093689, "step": 787 }, { "epoch": 0.2777092511013216, "grad_norm": 1.2234390313515955, "learning_rate": 1.957362148783256e-05, "loss": 0.6636664867401123, "step": 788 }, { "epoch": 0.2780616740088106, "grad_norm": 1.3850288575091645, "learning_rate": 1.9571936672587718e-05, "loss": 0.7177780866622925, "step": 789 }, { "epoch": 0.27841409691629954, "grad_norm": 1.6181532263095582, "learning_rate": 1.957024860796826e-05, "loss": 0.8263623714447021, "step": 790 }, { "epoch": 0.27876651982378853, "grad_norm": 1.4470113515398348, "learning_rate": 1.9568557294547244e-05, "loss": 0.7620534896850586, "step": 791 }, { "epoch": 0.2791189427312775, "grad_norm": 1.5101791429302596, "learning_rate": 1.956686273289881e-05, "loss": 0.812814474105835, "step": 792 }, { "epoch": 0.2794713656387665, "grad_norm": 1.2812553609430264, "learning_rate": 1.956516492359821e-05, "loss": 0.6494747400283813, "step": 793 }, { "epoch": 0.2798237885462555, "grad_norm": 1.3413487769011592, "learning_rate": 1.9563463867221793e-05, "loss": 0.7152044773101807, "step": 794 }, { "epoch": 0.2801762114537445, "grad_norm": 1.5718962936709213, "learning_rate": 1.956175956434702e-05, "loss": 0.7607219815254211, "step": 795 }, { "epoch": 0.2805286343612335, "grad_norm": 1.4483911078118432, "learning_rate": 1.9560052015552455e-05, "loss": 0.8793845176696777, "step": 796 }, { "epoch": 0.2808810572687225, "grad_norm": 1.509282266928049, "learning_rate": 1.9558341221417744e-05, "loss": 0.8314816951751709, "step": 797 }, { "epoch": 0.28123348017621147, "grad_norm": 1.2634078342185056, "learning_rate": 1.9556627182523656e-05, "loss": 0.8195264339447021, "step": 798 }, { "epoch": 0.28158590308370046, "grad_norm": 1.2808718319688324, "learning_rate": 1.9554909899452055e-05, "loss": 0.8079999685287476, "step": 799 }, { "epoch": 0.28193832599118945, "grad_norm": 1.6867283155032318, "learning_rate": 1.9553189372785903e-05, "loss": 0.7614034414291382, "step": 800 }, { "epoch": 0.2822907488986784, "grad_norm": 1.13179227916607, "learning_rate": 1.9551465603109263e-05, "loss": 0.6271458268165588, "step": 801 }, { "epoch": 0.2826431718061674, "grad_norm": 1.5872328004173855, "learning_rate": 1.9549738591007302e-05, "loss": 0.8061915040016174, "step": 802 }, { "epoch": 0.2829955947136564, "grad_norm": 1.3494758196376249, "learning_rate": 1.9548008337066294e-05, "loss": 0.663912296295166, "step": 803 }, { "epoch": 0.28334801762114536, "grad_norm": 1.503624867364233, "learning_rate": 1.9546274841873597e-05, "loss": 0.7582170963287354, "step": 804 }, { "epoch": 0.28370044052863436, "grad_norm": 1.3181032025931252, "learning_rate": 1.9544538106017682e-05, "loss": 0.7855465412139893, "step": 805 }, { "epoch": 0.28405286343612335, "grad_norm": 1.4462567272335825, "learning_rate": 1.9542798130088116e-05, "loss": 0.6976481676101685, "step": 806 }, { "epoch": 0.28440528634361234, "grad_norm": 1.8291330426153005, "learning_rate": 1.954105491467557e-05, "loss": 0.7678342461585999, "step": 807 }, { "epoch": 0.28475770925110133, "grad_norm": 1.2407582020259869, "learning_rate": 1.9539308460371812e-05, "loss": 0.6238858699798584, "step": 808 }, { "epoch": 0.2851101321585903, "grad_norm": 1.239785831064125, "learning_rate": 1.95375587677697e-05, "loss": 0.7756681442260742, "step": 809 }, { "epoch": 0.2854625550660793, "grad_norm": 1.462836813794646, "learning_rate": 1.953580583746321e-05, "loss": 0.8908202648162842, "step": 810 }, { "epoch": 0.2858149779735683, "grad_norm": 1.1795831445984086, "learning_rate": 1.9534049670047402e-05, "loss": 0.6769838929176331, "step": 811 }, { "epoch": 0.28616740088105724, "grad_norm": 1.2674126564024601, "learning_rate": 1.953229026611844e-05, "loss": 0.8452527523040771, "step": 812 }, { "epoch": 0.28651982378854624, "grad_norm": 1.1830287106246784, "learning_rate": 1.9530527626273592e-05, "loss": 0.7494348287582397, "step": 813 }, { "epoch": 0.2868722466960352, "grad_norm": 1.399665317775642, "learning_rate": 1.9528761751111215e-05, "loss": 0.7691028714179993, "step": 814 }, { "epoch": 0.2872246696035242, "grad_norm": 1.2077153417445161, "learning_rate": 1.9526992641230768e-05, "loss": 0.6854703426361084, "step": 815 }, { "epoch": 0.2875770925110132, "grad_norm": 1.4832887577463363, "learning_rate": 1.9525220297232815e-05, "loss": 0.7520424127578735, "step": 816 }, { "epoch": 0.2879295154185022, "grad_norm": 1.499896401145914, "learning_rate": 1.9523444719719003e-05, "loss": 0.7894444465637207, "step": 817 }, { "epoch": 0.2882819383259912, "grad_norm": 1.4246285223246848, "learning_rate": 1.952166590929209e-05, "loss": 0.7835032939910889, "step": 818 }, { "epoch": 0.2886343612334802, "grad_norm": 1.4284322886298129, "learning_rate": 1.9519883866555928e-05, "loss": 0.7932062149047852, "step": 819 }, { "epoch": 0.2889867841409692, "grad_norm": 1.3689283839888975, "learning_rate": 1.951809859211546e-05, "loss": 0.7917006015777588, "step": 820 }, { "epoch": 0.28933920704845817, "grad_norm": 1.1579342690806749, "learning_rate": 1.9516310086576734e-05, "loss": 0.5330606698989868, "step": 821 }, { "epoch": 0.28969162995594716, "grad_norm": 1.3284680872988386, "learning_rate": 1.9514518350546893e-05, "loss": 0.7243788242340088, "step": 822 }, { "epoch": 0.2900440528634361, "grad_norm": 1.5494348453743318, "learning_rate": 1.9512723384634175e-05, "loss": 0.7692278623580933, "step": 823 }, { "epoch": 0.2903964757709251, "grad_norm": 1.4784711521599085, "learning_rate": 1.9510925189447916e-05, "loss": 0.7537804841995239, "step": 824 }, { "epoch": 0.2907488986784141, "grad_norm": 1.6724318756605505, "learning_rate": 1.9509123765598545e-05, "loss": 0.9168751239776611, "step": 825 }, { "epoch": 0.2911013215859031, "grad_norm": 1.3269710873120673, "learning_rate": 1.9507319113697592e-05, "loss": 0.7863682508468628, "step": 826 }, { "epoch": 0.29145374449339206, "grad_norm": 1.3629585622585614, "learning_rate": 1.9505511234357677e-05, "loss": 0.7119239568710327, "step": 827 }, { "epoch": 0.29180616740088106, "grad_norm": 1.3362093903256012, "learning_rate": 1.950370012819252e-05, "loss": 0.6071019172668457, "step": 828 }, { "epoch": 0.29215859030837005, "grad_norm": 1.479432309492857, "learning_rate": 1.9501885795816937e-05, "loss": 0.9750580787658691, "step": 829 }, { "epoch": 0.29251101321585904, "grad_norm": 1.3797663030734688, "learning_rate": 1.9500068237846837e-05, "loss": 0.7465370297431946, "step": 830 }, { "epoch": 0.29286343612334803, "grad_norm": 1.3385246660479724, "learning_rate": 1.949824745489922e-05, "loss": 0.7821183204650879, "step": 831 }, { "epoch": 0.293215859030837, "grad_norm": 1.4671979426722186, "learning_rate": 1.949642344759219e-05, "loss": 0.7555009126663208, "step": 832 }, { "epoch": 0.293568281938326, "grad_norm": 1.4661056896012497, "learning_rate": 1.9494596216544942e-05, "loss": 0.841058075428009, "step": 833 }, { "epoch": 0.29392070484581495, "grad_norm": 1.4108089015351581, "learning_rate": 1.9492765762377762e-05, "loss": 0.737910270690918, "step": 834 }, { "epoch": 0.29427312775330394, "grad_norm": 1.430769526790491, "learning_rate": 1.9490932085712027e-05, "loss": 0.6817367076873779, "step": 835 }, { "epoch": 0.29462555066079293, "grad_norm": 1.4513808156166068, "learning_rate": 1.9489095187170218e-05, "loss": 0.6739218235015869, "step": 836 }, { "epoch": 0.2949779735682819, "grad_norm": 1.3318980758968664, "learning_rate": 1.9487255067375907e-05, "loss": 0.8632504940032959, "step": 837 }, { "epoch": 0.2953303964757709, "grad_norm": 1.4421404890889282, "learning_rate": 1.9485411726953753e-05, "loss": 0.6615850925445557, "step": 838 }, { "epoch": 0.2956828193832599, "grad_norm": 1.5159755088266125, "learning_rate": 1.9483565166529515e-05, "loss": 0.8647087812423706, "step": 839 }, { "epoch": 0.2960352422907489, "grad_norm": 1.0418453596772383, "learning_rate": 1.9481715386730044e-05, "loss": 0.5152087807655334, "step": 840 }, { "epoch": 0.2963876651982379, "grad_norm": 1.265898832717726, "learning_rate": 1.9479862388183283e-05, "loss": 0.7942806482315063, "step": 841 }, { "epoch": 0.2967400881057269, "grad_norm": 1.4728087242398047, "learning_rate": 1.947800617151826e-05, "loss": 0.6364283561706543, "step": 842 }, { "epoch": 0.2970925110132159, "grad_norm": 1.322764194763318, "learning_rate": 1.9476146737365112e-05, "loss": 0.8278179168701172, "step": 843 }, { "epoch": 0.29744493392070487, "grad_norm": 1.4629329991948483, "learning_rate": 1.9474284086355057e-05, "loss": 0.7369956970214844, "step": 844 }, { "epoch": 0.29779735682819386, "grad_norm": 1.314820640789224, "learning_rate": 1.9472418219120403e-05, "loss": 0.6879928112030029, "step": 845 }, { "epoch": 0.2981497797356828, "grad_norm": 1.357319714737328, "learning_rate": 1.9470549136294554e-05, "loss": 0.8312973976135254, "step": 846 }, { "epoch": 0.2985022026431718, "grad_norm": 1.3920317025034097, "learning_rate": 1.946867683851201e-05, "loss": 0.8102964162826538, "step": 847 }, { "epoch": 0.2988546255506608, "grad_norm": 1.502648854525568, "learning_rate": 1.9466801326408355e-05, "loss": 0.6136792898178101, "step": 848 }, { "epoch": 0.29920704845814977, "grad_norm": 1.4028028409959845, "learning_rate": 1.946492260062027e-05, "loss": 0.6388760805130005, "step": 849 }, { "epoch": 0.29955947136563876, "grad_norm": 1.2376129930975477, "learning_rate": 1.9463040661785516e-05, "loss": 0.6443628072738647, "step": 850 }, { "epoch": 0.29991189427312775, "grad_norm": 1.4811436209967876, "learning_rate": 1.9461155510542962e-05, "loss": 0.7763667702674866, "step": 851 }, { "epoch": 0.30026431718061675, "grad_norm": 1.3770267697185403, "learning_rate": 1.9459267147532555e-05, "loss": 0.8040921688079834, "step": 852 }, { "epoch": 0.30061674008810574, "grad_norm": 1.5848801035694915, "learning_rate": 1.9457375573395334e-05, "loss": 0.6271079778671265, "step": 853 }, { "epoch": 0.30096916299559473, "grad_norm": 1.4490523944014555, "learning_rate": 1.945548078877343e-05, "loss": 0.6970022916793823, "step": 854 }, { "epoch": 0.3013215859030837, "grad_norm": 1.5989299247861681, "learning_rate": 1.9453582794310063e-05, "loss": 0.8283002972602844, "step": 855 }, { "epoch": 0.3016740088105727, "grad_norm": 1.3183590658260465, "learning_rate": 1.9451681590649545e-05, "loss": 0.7989551424980164, "step": 856 }, { "epoch": 0.30202643171806165, "grad_norm": 1.6791390781024222, "learning_rate": 1.9449777178437274e-05, "loss": 0.7000687122344971, "step": 857 }, { "epoch": 0.30237885462555064, "grad_norm": 1.8022925697135672, "learning_rate": 1.944786955831974e-05, "loss": 0.8005126714706421, "step": 858 }, { "epoch": 0.30273127753303963, "grad_norm": 1.4999207234472591, "learning_rate": 1.9445958730944515e-05, "loss": 0.7060712575912476, "step": 859 }, { "epoch": 0.3030837004405286, "grad_norm": 1.4072429406012825, "learning_rate": 1.9444044696960277e-05, "loss": 0.6979726552963257, "step": 860 }, { "epoch": 0.3034361233480176, "grad_norm": 1.4515999764557612, "learning_rate": 1.9442127457016768e-05, "loss": 0.7916465401649475, "step": 861 }, { "epoch": 0.3037885462555066, "grad_norm": 1.1660322947657744, "learning_rate": 1.944020701176484e-05, "loss": 0.6980502009391785, "step": 862 }, { "epoch": 0.3041409691629956, "grad_norm": 1.7481448087127538, "learning_rate": 1.943828336185642e-05, "loss": 0.8479218482971191, "step": 863 }, { "epoch": 0.3044933920704846, "grad_norm": 1.6916771358958562, "learning_rate": 1.9436356507944532e-05, "loss": 0.8374297022819519, "step": 864 }, { "epoch": 0.3048458149779736, "grad_norm": 1.3059238044039985, "learning_rate": 1.943442645068328e-05, "loss": 0.6871248483657837, "step": 865 }, { "epoch": 0.3051982378854626, "grad_norm": 1.4668202087885096, "learning_rate": 1.9432493190727854e-05, "loss": 0.92267906665802, "step": 866 }, { "epoch": 0.30555066079295157, "grad_norm": 1.7147503992363287, "learning_rate": 1.9430556728734543e-05, "loss": 0.7068654298782349, "step": 867 }, { "epoch": 0.3059030837004405, "grad_norm": 1.354783765213683, "learning_rate": 1.942861706536071e-05, "loss": 0.830272912979126, "step": 868 }, { "epoch": 0.3062555066079295, "grad_norm": 1.5223972366721212, "learning_rate": 1.9426674201264814e-05, "loss": 0.7996113300323486, "step": 869 }, { "epoch": 0.3066079295154185, "grad_norm": 1.7576483396811688, "learning_rate": 1.9424728137106398e-05, "loss": 0.7519441843032837, "step": 870 }, { "epoch": 0.3069603524229075, "grad_norm": 1.92300313533063, "learning_rate": 1.9422778873546084e-05, "loss": 0.5812790393829346, "step": 871 }, { "epoch": 0.30731277533039647, "grad_norm": 1.058437605318741, "learning_rate": 1.9420826411245595e-05, "loss": 0.5953323841094971, "step": 872 }, { "epoch": 0.30766519823788546, "grad_norm": 1.3954802825469448, "learning_rate": 1.941887075086772e-05, "loss": 0.8307937979698181, "step": 873 }, { "epoch": 0.30801762114537445, "grad_norm": 1.5122839417773277, "learning_rate": 1.9416911893076358e-05, "loss": 0.7753443121910095, "step": 874 }, { "epoch": 0.30837004405286345, "grad_norm": 1.3848386830658772, "learning_rate": 1.9414949838536468e-05, "loss": 0.8803520798683167, "step": 875 }, { "epoch": 0.30872246696035244, "grad_norm": 1.3111930018969615, "learning_rate": 1.9412984587914115e-05, "loss": 0.6811587810516357, "step": 876 }, { "epoch": 0.30907488986784143, "grad_norm": 1.3880932208512609, "learning_rate": 1.9411016141876438e-05, "loss": 0.802099347114563, "step": 877 }, { "epoch": 0.3094273127753304, "grad_norm": 1.560285458084049, "learning_rate": 1.940904450109166e-05, "loss": 0.7325229644775391, "step": 878 }, { "epoch": 0.30977973568281936, "grad_norm": 1.5126812875374416, "learning_rate": 1.9407069666229097e-05, "loss": 0.6515973210334778, "step": 879 }, { "epoch": 0.31013215859030835, "grad_norm": 1.2990709527675965, "learning_rate": 1.9405091637959138e-05, "loss": 0.7314589619636536, "step": 880 }, { "epoch": 0.31048458149779734, "grad_norm": 1.2146229290292494, "learning_rate": 1.9403110416953267e-05, "loss": 0.6668078303337097, "step": 881 }, { "epoch": 0.31083700440528633, "grad_norm": 1.4214853985415763, "learning_rate": 1.9401126003884047e-05, "loss": 0.693236231803894, "step": 882 }, { "epoch": 0.3111894273127753, "grad_norm": 2.210010730425174, "learning_rate": 1.939913839942512e-05, "loss": 0.8242754936218262, "step": 883 }, { "epoch": 0.3115418502202643, "grad_norm": 1.4121001226290237, "learning_rate": 1.939714760425122e-05, "loss": 0.7776592373847961, "step": 884 }, { "epoch": 0.3118942731277533, "grad_norm": 1.6297557283357365, "learning_rate": 1.9395153619038158e-05, "loss": 0.7023555636405945, "step": 885 }, { "epoch": 0.3122466960352423, "grad_norm": 1.333512905730993, "learning_rate": 1.939315644446283e-05, "loss": 0.690382182598114, "step": 886 }, { "epoch": 0.3125991189427313, "grad_norm": 1.4632969046362096, "learning_rate": 1.9391156081203214e-05, "loss": 0.7590082287788391, "step": 887 }, { "epoch": 0.3129515418502203, "grad_norm": 1.3672878296080273, "learning_rate": 1.9389152529938377e-05, "loss": 0.7378168702125549, "step": 888 }, { "epoch": 0.3133039647577093, "grad_norm": 1.3616414763479574, "learning_rate": 1.938714579134845e-05, "loss": 0.7036890983581543, "step": 889 }, { "epoch": 0.3136563876651982, "grad_norm": 1.4808362954559244, "learning_rate": 1.938513586611467e-05, "loss": 0.8881829977035522, "step": 890 }, { "epoch": 0.3140088105726872, "grad_norm": 1.5370313355999317, "learning_rate": 1.9383122754919342e-05, "loss": 0.7467600107192993, "step": 891 }, { "epoch": 0.3143612334801762, "grad_norm": 1.6168044285318155, "learning_rate": 1.938110645844585e-05, "loss": 0.9358077049255371, "step": 892 }, { "epoch": 0.3147136563876652, "grad_norm": 1.3982581442164577, "learning_rate": 1.9379086977378664e-05, "loss": 0.7751256227493286, "step": 893 }, { "epoch": 0.3150660792951542, "grad_norm": 1.3071717433837386, "learning_rate": 1.9377064312403338e-05, "loss": 0.8020666837692261, "step": 894 }, { "epoch": 0.31541850220264317, "grad_norm": 1.2076526617304193, "learning_rate": 1.9375038464206507e-05, "loss": 0.7251513004302979, "step": 895 }, { "epoch": 0.31577092511013216, "grad_norm": 1.3323484110232422, "learning_rate": 1.9373009433475874e-05, "loss": 0.7163990139961243, "step": 896 }, { "epoch": 0.31612334801762115, "grad_norm": 1.7368098259899396, "learning_rate": 1.937097722090024e-05, "loss": 0.7208842039108276, "step": 897 }, { "epoch": 0.31647577092511014, "grad_norm": 1.3890083085574685, "learning_rate": 1.9368941827169475e-05, "loss": 0.7660849690437317, "step": 898 }, { "epoch": 0.31682819383259914, "grad_norm": 1.4598849060474621, "learning_rate": 1.9366903252974532e-05, "loss": 0.7017598152160645, "step": 899 }, { "epoch": 0.31718061674008813, "grad_norm": 1.1578269588811556, "learning_rate": 1.9364861499007443e-05, "loss": 0.6831692457199097, "step": 900 }, { "epoch": 0.3175330396475771, "grad_norm": 1.5110843884258551, "learning_rate": 1.936281656596132e-05, "loss": 0.6555520296096802, "step": 901 }, { "epoch": 0.31788546255506606, "grad_norm": 1.5455350998398028, "learning_rate": 1.9360768454530356e-05, "loss": 0.7401334047317505, "step": 902 }, { "epoch": 0.31823788546255505, "grad_norm": 1.445337217541868, "learning_rate": 1.935871716540982e-05, "loss": 0.7415893077850342, "step": 903 }, { "epoch": 0.31859030837004404, "grad_norm": 1.280838808592201, "learning_rate": 1.935666269929606e-05, "loss": 0.8254752159118652, "step": 904 }, { "epoch": 0.31894273127753303, "grad_norm": 1.4164353369528349, "learning_rate": 1.9354605056886505e-05, "loss": 0.708149254322052, "step": 905 }, { "epoch": 0.319295154185022, "grad_norm": 5.868993531178127, "learning_rate": 1.9352544238879654e-05, "loss": 0.8084006905555725, "step": 906 }, { "epoch": 0.319647577092511, "grad_norm": 1.264327413823813, "learning_rate": 1.93504802459751e-05, "loss": 0.8039542436599731, "step": 907 }, { "epoch": 0.32, "grad_norm": 1.3552380315038073, "learning_rate": 1.93484130788735e-05, "loss": 0.7563241720199585, "step": 908 }, { "epoch": 0.320352422907489, "grad_norm": 1.4802258000623036, "learning_rate": 1.9346342738276593e-05, "loss": 0.7972971200942993, "step": 909 }, { "epoch": 0.320704845814978, "grad_norm": 1.2978401429696003, "learning_rate": 1.93442692248872e-05, "loss": 0.6693121790885925, "step": 910 }, { "epoch": 0.321057268722467, "grad_norm": 1.567978048588056, "learning_rate": 1.9342192539409203e-05, "loss": 0.6597858667373657, "step": 911 }, { "epoch": 0.321409691629956, "grad_norm": 1.368700143265877, "learning_rate": 1.934011268254758e-05, "loss": 0.6771499514579773, "step": 912 }, { "epoch": 0.3217621145374449, "grad_norm": 1.2365987861589656, "learning_rate": 1.9338029655008375e-05, "loss": 0.6903397440910339, "step": 913 }, { "epoch": 0.3221145374449339, "grad_norm": 1.1408319382533163, "learning_rate": 1.9335943457498717e-05, "loss": 0.6287999153137207, "step": 914 }, { "epoch": 0.3224669603524229, "grad_norm": 1.6382789883498257, "learning_rate": 1.93338540907268e-05, "loss": 0.7199264764785767, "step": 915 }, { "epoch": 0.3228193832599119, "grad_norm": 1.3951711845041654, "learning_rate": 1.9331761555401896e-05, "loss": 0.6960160732269287, "step": 916 }, { "epoch": 0.3231718061674009, "grad_norm": 1.4692108732272398, "learning_rate": 1.932966585223436e-05, "loss": 0.8981958627700806, "step": 917 }, { "epoch": 0.32352422907488987, "grad_norm": 1.5685980092664367, "learning_rate": 1.932756698193562e-05, "loss": 0.786432147026062, "step": 918 }, { "epoch": 0.32387665198237886, "grad_norm": 1.5208274085752962, "learning_rate": 1.9325464945218172e-05, "loss": 0.7260904312133789, "step": 919 }, { "epoch": 0.32422907488986785, "grad_norm": 1.5076861367086136, "learning_rate": 1.9323359742795595e-05, "loss": 0.715835452079773, "step": 920 }, { "epoch": 0.32458149779735684, "grad_norm": 1.5022880591009429, "learning_rate": 1.932125137538254e-05, "loss": 0.6312157511711121, "step": 921 }, { "epoch": 0.32493392070484584, "grad_norm": 1.3825485581433186, "learning_rate": 1.931913984369473e-05, "loss": 0.7565821409225464, "step": 922 }, { "epoch": 0.3252863436123348, "grad_norm": 1.3787375139479208, "learning_rate": 1.931702514844896e-05, "loss": 0.6866531372070312, "step": 923 }, { "epoch": 0.32563876651982376, "grad_norm": 2.06933203374066, "learning_rate": 1.9314907290363117e-05, "loss": 0.879021167755127, "step": 924 }, { "epoch": 0.32599118942731276, "grad_norm": 1.4876230584538193, "learning_rate": 1.9312786270156135e-05, "loss": 0.6972150802612305, "step": 925 }, { "epoch": 0.32634361233480175, "grad_norm": 1.5939077112190465, "learning_rate": 1.9310662088548042e-05, "loss": 0.8735189437866211, "step": 926 }, { "epoch": 0.32669603524229074, "grad_norm": 1.4550040646675775, "learning_rate": 1.930853474625993e-05, "loss": 0.6114254593849182, "step": 927 }, { "epoch": 0.32704845814977973, "grad_norm": 1.5850836788259668, "learning_rate": 1.930640424401396e-05, "loss": 0.8032322525978088, "step": 928 }, { "epoch": 0.3274008810572687, "grad_norm": 1.3666090686487828, "learning_rate": 1.9304270582533376e-05, "loss": 0.7391160726547241, "step": 929 }, { "epoch": 0.3277533039647577, "grad_norm": 1.3744182830455962, "learning_rate": 1.930213376254249e-05, "loss": 0.7055366039276123, "step": 930 }, { "epoch": 0.3281057268722467, "grad_norm": 1.3717314993069374, "learning_rate": 1.9299993784766684e-05, "loss": 0.671670138835907, "step": 931 }, { "epoch": 0.3284581497797357, "grad_norm": 1.4961694507376992, "learning_rate": 1.9297850649932416e-05, "loss": 0.7486976385116577, "step": 932 }, { "epoch": 0.3288105726872247, "grad_norm": 1.3777653583239398, "learning_rate": 1.929570435876721e-05, "loss": 0.8767625093460083, "step": 933 }, { "epoch": 0.3291629955947137, "grad_norm": 1.5767252427705674, "learning_rate": 1.929355491199967e-05, "loss": 0.6841862797737122, "step": 934 }, { "epoch": 0.3295154185022026, "grad_norm": 1.4985001262879563, "learning_rate": 1.929140231035946e-05, "loss": 0.7745054960250854, "step": 935 }, { "epoch": 0.3298678414096916, "grad_norm": 1.4538548583561628, "learning_rate": 1.928924655457733e-05, "loss": 0.5879434943199158, "step": 936 }, { "epoch": 0.3302202643171806, "grad_norm": 1.4292680321712006, "learning_rate": 1.9287087645385084e-05, "loss": 0.8484170436859131, "step": 937 }, { "epoch": 0.3305726872246696, "grad_norm": 1.3383126778675687, "learning_rate": 1.9284925583515604e-05, "loss": 0.6518877148628235, "step": 938 }, { "epoch": 0.3309251101321586, "grad_norm": 1.3496744406534642, "learning_rate": 1.928276036970285e-05, "loss": 0.7694787383079529, "step": 939 }, { "epoch": 0.3312775330396476, "grad_norm": 1.475669634065235, "learning_rate": 1.928059200468184e-05, "loss": 0.6893239617347717, "step": 940 }, { "epoch": 0.33162995594713657, "grad_norm": 1.9386710613485005, "learning_rate": 1.927842048918867e-05, "loss": 0.7731181383132935, "step": 941 }, { "epoch": 0.33198237885462556, "grad_norm": 1.2730945433300995, "learning_rate": 1.9276245823960495e-05, "loss": 0.652579665184021, "step": 942 }, { "epoch": 0.33233480176211455, "grad_norm": 1.4590802585162193, "learning_rate": 1.927406800973555e-05, "loss": 0.7504575252532959, "step": 943 }, { "epoch": 0.33268722466960354, "grad_norm": 1.2636242756085148, "learning_rate": 1.927188704725314e-05, "loss": 0.6199444532394409, "step": 944 }, { "epoch": 0.33303964757709253, "grad_norm": 1.3381297141173314, "learning_rate": 1.9269702937253623e-05, "loss": 0.7452073693275452, "step": 945 }, { "epoch": 0.33339207048458147, "grad_norm": 1.6220831494484687, "learning_rate": 1.926751568047845e-05, "loss": 0.7538012266159058, "step": 946 }, { "epoch": 0.33374449339207046, "grad_norm": 1.298282312930767, "learning_rate": 1.9265325277670114e-05, "loss": 0.6670408248901367, "step": 947 }, { "epoch": 0.33409691629955945, "grad_norm": 1.3861711594873305, "learning_rate": 1.926313172957219e-05, "loss": 0.8060495853424072, "step": 948 }, { "epoch": 0.33444933920704845, "grad_norm": 1.497135036962013, "learning_rate": 1.926093503692933e-05, "loss": 0.7494044303894043, "step": 949 }, { "epoch": 0.33480176211453744, "grad_norm": 1.4954420855155135, "learning_rate": 1.9258735200487235e-05, "loss": 0.5751914978027344, "step": 950 }, { "epoch": 0.33515418502202643, "grad_norm": 1.3135496972020755, "learning_rate": 1.9256532220992683e-05, "loss": 0.7234281301498413, "step": 951 }, { "epoch": 0.3355066079295154, "grad_norm": 1.648299384166419, "learning_rate": 1.9254326099193515e-05, "loss": 0.7721251249313354, "step": 952 }, { "epoch": 0.3358590308370044, "grad_norm": 1.5273494870998061, "learning_rate": 1.925211683583864e-05, "loss": 0.7240835428237915, "step": 953 }, { "epoch": 0.3362114537444934, "grad_norm": 1.5101195617398009, "learning_rate": 1.9249904431678037e-05, "loss": 0.6622776985168457, "step": 954 }, { "epoch": 0.3365638766519824, "grad_norm": 1.7484785330432984, "learning_rate": 1.9247688887462747e-05, "loss": 0.9682766199111938, "step": 955 }, { "epoch": 0.3369162995594714, "grad_norm": 1.5743447413941896, "learning_rate": 1.9245470203944878e-05, "loss": 0.8363134860992432, "step": 956 }, { "epoch": 0.3372687224669604, "grad_norm": 1.4500608043156524, "learning_rate": 1.9243248381877605e-05, "loss": 0.6530857086181641, "step": 957 }, { "epoch": 0.3376211453744493, "grad_norm": 1.2035108561422267, "learning_rate": 1.924102342201517e-05, "loss": 0.5186585187911987, "step": 958 }, { "epoch": 0.3379735682819383, "grad_norm": 1.3827408215949344, "learning_rate": 1.9238795325112867e-05, "loss": 0.6729516983032227, "step": 959 }, { "epoch": 0.3383259911894273, "grad_norm": 4.587971824519282, "learning_rate": 1.9236564091927083e-05, "loss": 0.6991842985153198, "step": 960 }, { "epoch": 0.3386784140969163, "grad_norm": 1.516889979226708, "learning_rate": 1.9234329723215235e-05, "loss": 0.7738245725631714, "step": 961 }, { "epoch": 0.3390308370044053, "grad_norm": 1.4574207335379696, "learning_rate": 1.923209221973583e-05, "loss": 0.7027466893196106, "step": 962 }, { "epoch": 0.3393832599118943, "grad_norm": 1.401098486802875, "learning_rate": 1.922985158224843e-05, "loss": 0.7868508696556091, "step": 963 }, { "epoch": 0.33973568281938327, "grad_norm": 1.3325223534105368, "learning_rate": 1.9227607811513662e-05, "loss": 0.7499512434005737, "step": 964 }, { "epoch": 0.34008810572687226, "grad_norm": 1.3198116129339372, "learning_rate": 1.9225360908293217e-05, "loss": 0.6662228107452393, "step": 965 }, { "epoch": 0.34044052863436125, "grad_norm": 1.4854956624988247, "learning_rate": 1.9223110873349847e-05, "loss": 0.8570939302444458, "step": 966 }, { "epoch": 0.34079295154185024, "grad_norm": 1.3385040645698225, "learning_rate": 1.9220857707447372e-05, "loss": 0.7497669458389282, "step": 967 }, { "epoch": 0.34114537444933923, "grad_norm": 1.2753268813313299, "learning_rate": 1.9218601411350663e-05, "loss": 0.7356737852096558, "step": 968 }, { "epoch": 0.34149779735682817, "grad_norm": 2.3286924006274896, "learning_rate": 1.9216341985825672e-05, "loss": 0.7880491018295288, "step": 969 }, { "epoch": 0.34185022026431716, "grad_norm": 1.4677269303314853, "learning_rate": 1.92140794316394e-05, "loss": 0.734922468662262, "step": 970 }, { "epoch": 0.34220264317180615, "grad_norm": 1.5501144518696521, "learning_rate": 1.9211813749559916e-05, "loss": 0.6710363626480103, "step": 971 }, { "epoch": 0.34255506607929515, "grad_norm": 1.256856073477316, "learning_rate": 1.920954494035634e-05, "loss": 0.7300584316253662, "step": 972 }, { "epoch": 0.34290748898678414, "grad_norm": 1.5351698758546528, "learning_rate": 1.9207273004798873e-05, "loss": 0.8584152460098267, "step": 973 }, { "epoch": 0.34325991189427313, "grad_norm": 1.4115351274616093, "learning_rate": 1.9204997943658764e-05, "loss": 0.7307419776916504, "step": 974 }, { "epoch": 0.3436123348017621, "grad_norm": 1.434441373942747, "learning_rate": 1.920271975770832e-05, "loss": 0.6004960536956787, "step": 975 }, { "epoch": 0.3439647577092511, "grad_norm": 1.4015679334218965, "learning_rate": 1.920043844772092e-05, "loss": 0.7951763868331909, "step": 976 }, { "epoch": 0.3443171806167401, "grad_norm": 1.6575061104845086, "learning_rate": 1.919815401447099e-05, "loss": 0.6835082769393921, "step": 977 }, { "epoch": 0.3446696035242291, "grad_norm": 1.3790503468449504, "learning_rate": 1.9195866458734034e-05, "loss": 0.7556526064872742, "step": 978 }, { "epoch": 0.3450220264317181, "grad_norm": 1.2642410643718298, "learning_rate": 1.91935757812866e-05, "loss": 0.6918114423751831, "step": 979 }, { "epoch": 0.345374449339207, "grad_norm": 1.6331863015088222, "learning_rate": 1.9191281982906304e-05, "loss": 0.8197037577629089, "step": 980 }, { "epoch": 0.345726872246696, "grad_norm": 1.9820857497043596, "learning_rate": 1.9188985064371818e-05, "loss": 0.833138644695282, "step": 981 }, { "epoch": 0.346079295154185, "grad_norm": 1.3877421520016489, "learning_rate": 1.9186685026462874e-05, "loss": 0.6593397855758667, "step": 982 }, { "epoch": 0.346431718061674, "grad_norm": 1.750501089720619, "learning_rate": 1.918438186996026e-05, "loss": 0.7535643577575684, "step": 983 }, { "epoch": 0.346784140969163, "grad_norm": 1.5295833510904033, "learning_rate": 1.9182075595645836e-05, "loss": 0.6959745287895203, "step": 984 }, { "epoch": 0.347136563876652, "grad_norm": 1.4045200992789866, "learning_rate": 1.91797662043025e-05, "loss": 0.7349518537521362, "step": 985 }, { "epoch": 0.347488986784141, "grad_norm": 1.3769518272852244, "learning_rate": 1.9177453696714224e-05, "loss": 0.7677974700927734, "step": 986 }, { "epoch": 0.34784140969162997, "grad_norm": 1.4486626509256493, "learning_rate": 1.917513807366603e-05, "loss": 0.7302255630493164, "step": 987 }, { "epoch": 0.34819383259911896, "grad_norm": 1.477891236612788, "learning_rate": 1.9172819335944003e-05, "loss": 0.838138222694397, "step": 988 }, { "epoch": 0.34854625550660795, "grad_norm": 1.555345260078333, "learning_rate": 1.9170497484335276e-05, "loss": 0.8018180131912231, "step": 989 }, { "epoch": 0.34889867841409694, "grad_norm": 1.4299439839627417, "learning_rate": 1.9168172519628056e-05, "loss": 0.8085787296295166, "step": 990 }, { "epoch": 0.3492511013215859, "grad_norm": 1.407734167007011, "learning_rate": 1.9165844442611584e-05, "loss": 0.8419004082679749, "step": 991 }, { "epoch": 0.34960352422907487, "grad_norm": 1.485093259368171, "learning_rate": 1.916351325407618e-05, "loss": 0.8255139589309692, "step": 992 }, { "epoch": 0.34995594713656386, "grad_norm": 1.3581016847128187, "learning_rate": 1.9161178954813203e-05, "loss": 0.7588528990745544, "step": 993 }, { "epoch": 0.35030837004405285, "grad_norm": 1.3722258517458088, "learning_rate": 1.9158841545615076e-05, "loss": 0.7057096362113953, "step": 994 }, { "epoch": 0.35066079295154184, "grad_norm": 1.3264479954648483, "learning_rate": 1.915650102727528e-05, "loss": 0.6913125514984131, "step": 995 }, { "epoch": 0.35101321585903084, "grad_norm": 1.4277288783882767, "learning_rate": 1.9154157400588348e-05, "loss": 0.7622898817062378, "step": 996 }, { "epoch": 0.3513656387665198, "grad_norm": 1.3345359637809249, "learning_rate": 1.915181066634986e-05, "loss": 0.6918702125549316, "step": 997 }, { "epoch": 0.3517180616740088, "grad_norm": 1.4330955991310976, "learning_rate": 1.914946082535647e-05, "loss": 0.8801462650299072, "step": 998 }, { "epoch": 0.3520704845814978, "grad_norm": 1.6364104196010791, "learning_rate": 1.9147107878405873e-05, "loss": 0.7901172637939453, "step": 999 }, { "epoch": 0.3524229074889868, "grad_norm": 1.3202428944557627, "learning_rate": 1.9144751826296818e-05, "loss": 0.7308447360992432, "step": 1000 }, { "epoch": 0.3527753303964758, "grad_norm": 1.3152547105893029, "learning_rate": 1.9142392669829114e-05, "loss": 0.5733275413513184, "step": 1001 }, { "epoch": 0.35312775330396473, "grad_norm": 1.4327185784306546, "learning_rate": 1.9140030409803622e-05, "loss": 0.7251306772232056, "step": 1002 }, { "epoch": 0.3534801762114537, "grad_norm": 1.3492122584167072, "learning_rate": 1.913766504702225e-05, "loss": 0.7983027696609497, "step": 1003 }, { "epoch": 0.3538325991189427, "grad_norm": 1.5284478719025472, "learning_rate": 1.9135296582287973e-05, "loss": 0.7464017868041992, "step": 1004 }, { "epoch": 0.3541850220264317, "grad_norm": 1.3377291300677683, "learning_rate": 1.9132925016404805e-05, "loss": 0.7333002686500549, "step": 1005 }, { "epoch": 0.3545374449339207, "grad_norm": 1.4170618275882645, "learning_rate": 1.9130550350177823e-05, "loss": 0.729085385799408, "step": 1006 }, { "epoch": 0.3548898678414097, "grad_norm": 1.1531700234964573, "learning_rate": 1.9128172584413148e-05, "loss": 0.7599227428436279, "step": 1007 }, { "epoch": 0.3552422907488987, "grad_norm": 1.3499603875621307, "learning_rate": 1.9125791719917962e-05, "loss": 0.8110464811325073, "step": 1008 }, { "epoch": 0.3555947136563877, "grad_norm": 1.443391069493257, "learning_rate": 1.912340775750049e-05, "loss": 0.7431697845458984, "step": 1009 }, { "epoch": 0.35594713656387666, "grad_norm": 1.3353700802371913, "learning_rate": 1.9121020697970016e-05, "loss": 0.7833640575408936, "step": 1010 }, { "epoch": 0.35629955947136566, "grad_norm": 1.2927496434698726, "learning_rate": 1.9118630542136874e-05, "loss": 0.7693058252334595, "step": 1011 }, { "epoch": 0.35665198237885465, "grad_norm": 1.3593779388270224, "learning_rate": 1.9116237290812445e-05, "loss": 0.7724676132202148, "step": 1012 }, { "epoch": 0.3570044052863436, "grad_norm": 1.3849928303091037, "learning_rate": 1.911384094480916e-05, "loss": 0.6024055480957031, "step": 1013 }, { "epoch": 0.3573568281938326, "grad_norm": 1.254237630036734, "learning_rate": 1.9111441504940514e-05, "loss": 0.7710703611373901, "step": 1014 }, { "epoch": 0.35770925110132157, "grad_norm": 1.3917926832468532, "learning_rate": 1.910903897202103e-05, "loss": 0.7591651678085327, "step": 1015 }, { "epoch": 0.35806167400881056, "grad_norm": 1.3250641662724636, "learning_rate": 1.9106633346866302e-05, "loss": 0.7721874713897705, "step": 1016 }, { "epoch": 0.35841409691629955, "grad_norm": 1.3837097156983347, "learning_rate": 1.910422463029296e-05, "loss": 0.6767420172691345, "step": 1017 }, { "epoch": 0.35876651982378854, "grad_norm": 1.5808312779065312, "learning_rate": 1.910181282311869e-05, "loss": 0.6704902648925781, "step": 1018 }, { "epoch": 0.35911894273127754, "grad_norm": 1.3288966146848866, "learning_rate": 1.9099397926162227e-05, "loss": 0.8871079683303833, "step": 1019 }, { "epoch": 0.3594713656387665, "grad_norm": 1.5716465127646195, "learning_rate": 1.909697994024335e-05, "loss": 0.7222549319267273, "step": 1020 }, { "epoch": 0.3598237885462555, "grad_norm": 1.4050103839828958, "learning_rate": 1.9094558866182892e-05, "loss": 0.7443021535873413, "step": 1021 }, { "epoch": 0.3601762114537445, "grad_norm": 1.3877313570980134, "learning_rate": 1.9092134704802735e-05, "loss": 0.7698349952697754, "step": 1022 }, { "epoch": 0.3605286343612335, "grad_norm": 1.9010750041325926, "learning_rate": 1.9089707456925798e-05, "loss": 0.863248348236084, "step": 1023 }, { "epoch": 0.3608810572687225, "grad_norm": 1.1572981545597187, "learning_rate": 1.9087277123376068e-05, "loss": 0.7036338448524475, "step": 1024 }, { "epoch": 0.36123348017621143, "grad_norm": 1.5140044810060398, "learning_rate": 1.9084843704978558e-05, "loss": 0.7427274584770203, "step": 1025 }, { "epoch": 0.3615859030837004, "grad_norm": 1.5903685422277276, "learning_rate": 1.908240720255934e-05, "loss": 0.6548313498497009, "step": 1026 }, { "epoch": 0.3619383259911894, "grad_norm": 1.3326463394362358, "learning_rate": 1.9079967616945534e-05, "loss": 0.7586454749107361, "step": 1027 }, { "epoch": 0.3622907488986784, "grad_norm": 1.45389698507953, "learning_rate": 1.90775249489653e-05, "loss": 0.6954889297485352, "step": 1028 }, { "epoch": 0.3626431718061674, "grad_norm": 1.6543950271160617, "learning_rate": 1.907507919944785e-05, "loss": 0.8798770904541016, "step": 1029 }, { "epoch": 0.3629955947136564, "grad_norm": 1.3815054682339305, "learning_rate": 1.9072630369223433e-05, "loss": 0.6600694060325623, "step": 1030 }, { "epoch": 0.3633480176211454, "grad_norm": 1.5776995405913148, "learning_rate": 1.9070178459123366e-05, "loss": 0.6830897927284241, "step": 1031 }, { "epoch": 0.36370044052863437, "grad_norm": 1.1973844620945089, "learning_rate": 1.906772346997998e-05, "loss": 0.6283613443374634, "step": 1032 }, { "epoch": 0.36405286343612336, "grad_norm": 1.2892968799675324, "learning_rate": 1.9065265402626676e-05, "loss": 0.6451754570007324, "step": 1033 }, { "epoch": 0.36440528634361236, "grad_norm": 1.4387559441313162, "learning_rate": 1.9062804257897887e-05, "loss": 0.7949883937835693, "step": 1034 }, { "epoch": 0.36475770925110135, "grad_norm": 1.4366893391590683, "learning_rate": 1.90603400366291e-05, "loss": 0.625343918800354, "step": 1035 }, { "epoch": 0.3651101321585903, "grad_norm": 1.5716897663583798, "learning_rate": 1.9057872739656843e-05, "loss": 0.8398839235305786, "step": 1036 }, { "epoch": 0.3654625550660793, "grad_norm": 1.6515297053174456, "learning_rate": 1.9055402367818673e-05, "loss": 0.8628166913986206, "step": 1037 }, { "epoch": 0.36581497797356827, "grad_norm": 1.6000244306696312, "learning_rate": 1.905292892195322e-05, "loss": 0.7494110465049744, "step": 1038 }, { "epoch": 0.36616740088105726, "grad_norm": 1.6358981860019415, "learning_rate": 1.9050452402900134e-05, "loss": 0.7695099115371704, "step": 1039 }, { "epoch": 0.36651982378854625, "grad_norm": 1.3948395289772064, "learning_rate": 1.904797281150012e-05, "loss": 0.8067067861557007, "step": 1040 }, { "epoch": 0.36687224669603524, "grad_norm": 1.5430196098026252, "learning_rate": 1.9045490148594917e-05, "loss": 0.7542074918746948, "step": 1041 }, { "epoch": 0.36722466960352423, "grad_norm": 1.4232871422135234, "learning_rate": 1.9043004415027314e-05, "loss": 0.7027335166931152, "step": 1042 }, { "epoch": 0.3675770925110132, "grad_norm": 1.2842638834648272, "learning_rate": 1.9040515611641142e-05, "loss": 0.7779253721237183, "step": 1043 }, { "epoch": 0.3679295154185022, "grad_norm": 1.4713589430159515, "learning_rate": 1.9038023739281275e-05, "loss": 0.6840049028396606, "step": 1044 }, { "epoch": 0.3682819383259912, "grad_norm": 1.2252786450532585, "learning_rate": 1.903552879879362e-05, "loss": 0.6183794736862183, "step": 1045 }, { "epoch": 0.3686343612334802, "grad_norm": 1.3239395642180716, "learning_rate": 1.9033030791025127e-05, "loss": 0.7770168781280518, "step": 1046 }, { "epoch": 0.36898678414096914, "grad_norm": 1.5646813675557831, "learning_rate": 1.9030529716823806e-05, "loss": 0.7192036509513855, "step": 1047 }, { "epoch": 0.36933920704845813, "grad_norm": 1.3179369082607764, "learning_rate": 1.9028025577038688e-05, "loss": 0.6604419946670532, "step": 1048 }, { "epoch": 0.3696916299559471, "grad_norm": 1.7088212085954357, "learning_rate": 1.9025518372519847e-05, "loss": 0.7999060153961182, "step": 1049 }, { "epoch": 0.3700440528634361, "grad_norm": 1.6369356635778263, "learning_rate": 1.9023008104118404e-05, "loss": 0.7487536668777466, "step": 1050 }, { "epoch": 0.3703964757709251, "grad_norm": 1.4534592079598474, "learning_rate": 1.9020494772686513e-05, "loss": 0.7786455154418945, "step": 1051 }, { "epoch": 0.3707488986784141, "grad_norm": 1.5556124976221868, "learning_rate": 1.9017978379077378e-05, "loss": 0.7592626214027405, "step": 1052 }, { "epoch": 0.3711013215859031, "grad_norm": 1.3193440168525459, "learning_rate": 1.901545892414523e-05, "loss": 0.774850606918335, "step": 1053 }, { "epoch": 0.3714537444933921, "grad_norm": 1.4859587321900767, "learning_rate": 1.901293640874535e-05, "loss": 0.5430009365081787, "step": 1054 }, { "epoch": 0.37180616740088107, "grad_norm": 1.4541817899150224, "learning_rate": 1.9010410833734053e-05, "loss": 0.7459923624992371, "step": 1055 }, { "epoch": 0.37215859030837006, "grad_norm": 1.6269332982530442, "learning_rate": 1.9007882199968692e-05, "loss": 0.6372017860412598, "step": 1056 }, { "epoch": 0.37251101321585905, "grad_norm": 1.6522112420188226, "learning_rate": 1.900535050830766e-05, "loss": 0.6773583292961121, "step": 1057 }, { "epoch": 0.372863436123348, "grad_norm": 1.7342256392022233, "learning_rate": 1.900281575961039e-05, "loss": 0.8431004285812378, "step": 1058 }, { "epoch": 0.373215859030837, "grad_norm": 1.4085085883480681, "learning_rate": 1.9000277954737342e-05, "loss": 0.6361340284347534, "step": 1059 }, { "epoch": 0.373568281938326, "grad_norm": 1.3793359019510345, "learning_rate": 1.8997737094550033e-05, "loss": 0.7153787612915039, "step": 1060 }, { "epoch": 0.37392070484581497, "grad_norm": 1.4220392348844544, "learning_rate": 1.8995193179911e-05, "loss": 0.7244935631752014, "step": 1061 }, { "epoch": 0.37427312775330396, "grad_norm": 1.4061330426818142, "learning_rate": 1.8992646211683817e-05, "loss": 0.6648202538490295, "step": 1062 }, { "epoch": 0.37462555066079295, "grad_norm": 1.4217807346058315, "learning_rate": 1.8990096190733113e-05, "loss": 0.6528836488723755, "step": 1063 }, { "epoch": 0.37497797356828194, "grad_norm": 1.4695679092519263, "learning_rate": 1.8987543117924532e-05, "loss": 0.6749341487884521, "step": 1064 }, { "epoch": 0.37533039647577093, "grad_norm": 1.3287092803608218, "learning_rate": 1.8984986994124766e-05, "loss": 0.7402256727218628, "step": 1065 }, { "epoch": 0.3756828193832599, "grad_norm": 1.2181513754192281, "learning_rate": 1.898242782020154e-05, "loss": 0.5638695955276489, "step": 1066 }, { "epoch": 0.3760352422907489, "grad_norm": 1.5457056768133204, "learning_rate": 1.897986559702361e-05, "loss": 0.829822838306427, "step": 1067 }, { "epoch": 0.3763876651982379, "grad_norm": 1.3351440834834858, "learning_rate": 1.8977300325460774e-05, "loss": 0.6796025037765503, "step": 1068 }, { "epoch": 0.37674008810572684, "grad_norm": 1.3611135527247238, "learning_rate": 1.897473200638386e-05, "loss": 0.8584038615226746, "step": 1069 }, { "epoch": 0.37709251101321584, "grad_norm": 1.4622377307020165, "learning_rate": 1.897216064066474e-05, "loss": 0.8069149255752563, "step": 1070 }, { "epoch": 0.37744493392070483, "grad_norm": 1.2194173424769332, "learning_rate": 1.89695862291763e-05, "loss": 0.5762223601341248, "step": 1071 }, { "epoch": 0.3777973568281938, "grad_norm": 1.3827918624348656, "learning_rate": 1.8967008772792483e-05, "loss": 0.6626466512680054, "step": 1072 }, { "epoch": 0.3781497797356828, "grad_norm": 1.15359758590964, "learning_rate": 1.896442827238825e-05, "loss": 0.6260244250297546, "step": 1073 }, { "epoch": 0.3785022026431718, "grad_norm": 1.8994686915407593, "learning_rate": 1.8961844728839602e-05, "loss": 0.8090343475341797, "step": 1074 }, { "epoch": 0.3788546255506608, "grad_norm": 1.4116056126096472, "learning_rate": 1.8959258143023575e-05, "loss": 0.66957026720047, "step": 1075 }, { "epoch": 0.3792070484581498, "grad_norm": 1.308974606662818, "learning_rate": 1.8956668515818223e-05, "loss": 0.7103087306022644, "step": 1076 }, { "epoch": 0.3795594713656388, "grad_norm": 1.468914156940793, "learning_rate": 1.895407584810266e-05, "loss": 0.7469112277030945, "step": 1077 }, { "epoch": 0.37991189427312777, "grad_norm": 1.624950928787921, "learning_rate": 1.8951480140757003e-05, "loss": 0.8252213001251221, "step": 1078 }, { "epoch": 0.38026431718061676, "grad_norm": 1.4238044077341658, "learning_rate": 1.8948881394662417e-05, "loss": 0.7204562425613403, "step": 1079 }, { "epoch": 0.38061674008810575, "grad_norm": 1.5659608304591812, "learning_rate": 1.89462796107011e-05, "loss": 0.7325669527053833, "step": 1080 }, { "epoch": 0.3809691629955947, "grad_norm": 1.2964480504204927, "learning_rate": 1.8943674789756276e-05, "loss": 0.738972008228302, "step": 1081 }, { "epoch": 0.3813215859030837, "grad_norm": 1.5892566433984823, "learning_rate": 1.8941066932712194e-05, "loss": 0.7468631267547607, "step": 1082 }, { "epoch": 0.3816740088105727, "grad_norm": 1.6145182365902104, "learning_rate": 1.893845604045415e-05, "loss": 0.6479831337928772, "step": 1083 }, { "epoch": 0.38202643171806167, "grad_norm": 1.3615750017210906, "learning_rate": 1.893584211386845e-05, "loss": 0.7615871429443359, "step": 1084 }, { "epoch": 0.38237885462555066, "grad_norm": 1.8901071385329251, "learning_rate": 1.8933225153842446e-05, "loss": 0.6934449076652527, "step": 1085 }, { "epoch": 0.38273127753303965, "grad_norm": 1.2384833194245852, "learning_rate": 1.8930605161264517e-05, "loss": 0.5267079472541809, "step": 1086 }, { "epoch": 0.38308370044052864, "grad_norm": 1.524832028509735, "learning_rate": 1.892798213702407e-05, "loss": 0.7309125661849976, "step": 1087 }, { "epoch": 0.38343612334801763, "grad_norm": 1.3743253361073855, "learning_rate": 1.892535608201153e-05, "loss": 0.8133678436279297, "step": 1088 }, { "epoch": 0.3837885462555066, "grad_norm": 1.3915725940468886, "learning_rate": 1.892272699711837e-05, "loss": 0.6097027063369751, "step": 1089 }, { "epoch": 0.3841409691629956, "grad_norm": 1.548287022579551, "learning_rate": 1.8920094883237082e-05, "loss": 0.70456862449646, "step": 1090 }, { "epoch": 0.3844933920704846, "grad_norm": 1.2952569165029428, "learning_rate": 1.8917459741261183e-05, "loss": 0.7236523628234863, "step": 1091 }, { "epoch": 0.38484581497797354, "grad_norm": 1.5039785189114319, "learning_rate": 1.8914821572085224e-05, "loss": 0.7251272201538086, "step": 1092 }, { "epoch": 0.38519823788546254, "grad_norm": 1.271767676796452, "learning_rate": 1.8912180376604777e-05, "loss": 0.7381070852279663, "step": 1093 }, { "epoch": 0.3855506607929515, "grad_norm": 1.6023999081974447, "learning_rate": 1.8909536155716458e-05, "loss": 0.6654129028320312, "step": 1094 }, { "epoch": 0.3859030837004405, "grad_norm": 1.4351957388528893, "learning_rate": 1.8906888910317883e-05, "loss": 0.7823128700256348, "step": 1095 }, { "epoch": 0.3862555066079295, "grad_norm": 1.2302320218391962, "learning_rate": 1.8904238641307718e-05, "loss": 0.5988126993179321, "step": 1096 }, { "epoch": 0.3866079295154185, "grad_norm": 1.6745614533481283, "learning_rate": 1.8901585349585643e-05, "loss": 0.7671465873718262, "step": 1097 }, { "epoch": 0.3869603524229075, "grad_norm": 1.4027982600434907, "learning_rate": 1.889892903605237e-05, "loss": 0.7878838777542114, "step": 1098 }, { "epoch": 0.3873127753303965, "grad_norm": 1.2802181437962392, "learning_rate": 1.8896269701609634e-05, "loss": 0.72254878282547, "step": 1099 }, { "epoch": 0.3876651982378855, "grad_norm": 1.4183908379879375, "learning_rate": 1.8893607347160198e-05, "loss": 0.6796868443489075, "step": 1100 }, { "epoch": 0.38801762114537447, "grad_norm": 1.510469064523606, "learning_rate": 1.8890941973607843e-05, "loss": 0.6378471851348877, "step": 1101 }, { "epoch": 0.38837004405286346, "grad_norm": 1.327169163711753, "learning_rate": 1.888827358185739e-05, "loss": 0.8473032712936401, "step": 1102 }, { "epoch": 0.3887224669603524, "grad_norm": 1.4704779902492213, "learning_rate": 1.8885602172814667e-05, "loss": 0.8272742033004761, "step": 1103 }, { "epoch": 0.3890748898678414, "grad_norm": 1.45593169268278, "learning_rate": 1.8882927747386533e-05, "loss": 0.7244507670402527, "step": 1104 }, { "epoch": 0.3894273127753304, "grad_norm": 1.3081271484466186, "learning_rate": 1.888025030648088e-05, "loss": 0.5764014720916748, "step": 1105 }, { "epoch": 0.3897797356828194, "grad_norm": 1.230279760550168, "learning_rate": 1.887756985100661e-05, "loss": 0.6944009065628052, "step": 1106 }, { "epoch": 0.39013215859030836, "grad_norm": 1.381963017332696, "learning_rate": 1.8874886381873657e-05, "loss": 0.7096902132034302, "step": 1107 }, { "epoch": 0.39048458149779736, "grad_norm": 1.6526795986169043, "learning_rate": 1.8872199899992973e-05, "loss": 0.6304805278778076, "step": 1108 }, { "epoch": 0.39083700440528635, "grad_norm": 1.3081643743142675, "learning_rate": 1.8869510406276538e-05, "loss": 0.7091327905654907, "step": 1109 }, { "epoch": 0.39118942731277534, "grad_norm": 1.4257979117717376, "learning_rate": 1.886681790163735e-05, "loss": 0.6575565338134766, "step": 1110 }, { "epoch": 0.39154185022026433, "grad_norm": 1.6155582257297172, "learning_rate": 1.8864122386989426e-05, "loss": 0.837468147277832, "step": 1111 }, { "epoch": 0.3918942731277533, "grad_norm": 1.4395330206284223, "learning_rate": 1.8861423863247816e-05, "loss": 0.6861380338668823, "step": 1112 }, { "epoch": 0.3922466960352423, "grad_norm": 1.3206140573248442, "learning_rate": 1.8858722331328582e-05, "loss": 0.7421156167984009, "step": 1113 }, { "epoch": 0.39259911894273125, "grad_norm": 1.4106532753820455, "learning_rate": 1.8856017792148807e-05, "loss": 0.8037575483322144, "step": 1114 }, { "epoch": 0.39295154185022024, "grad_norm": 1.34412494732323, "learning_rate": 1.8853310246626608e-05, "loss": 0.6530179381370544, "step": 1115 }, { "epoch": 0.39330396475770923, "grad_norm": 1.7480111733406445, "learning_rate": 1.88505996956811e-05, "loss": 0.9039478302001953, "step": 1116 }, { "epoch": 0.3936563876651982, "grad_norm": 1.2556675250098766, "learning_rate": 1.8847886140232438e-05, "loss": 0.7734917998313904, "step": 1117 }, { "epoch": 0.3940088105726872, "grad_norm": 1.4809117769611548, "learning_rate": 1.8845169581201786e-05, "loss": 0.7146204113960266, "step": 1118 }, { "epoch": 0.3943612334801762, "grad_norm": 1.4108388267740644, "learning_rate": 1.8842450019511337e-05, "loss": 0.6427414417266846, "step": 1119 }, { "epoch": 0.3947136563876652, "grad_norm": 1.462443026711516, "learning_rate": 1.883972745608429e-05, "loss": 0.7241504192352295, "step": 1120 }, { "epoch": 0.3950660792951542, "grad_norm": 1.5796197427651677, "learning_rate": 1.8837001891844875e-05, "loss": 0.7085466384887695, "step": 1121 }, { "epoch": 0.3954185022026432, "grad_norm": 1.220037664049328, "learning_rate": 1.8834273327718334e-05, "loss": 0.6099711656570435, "step": 1122 }, { "epoch": 0.3957709251101322, "grad_norm": 1.7637467057266936, "learning_rate": 1.8831541764630936e-05, "loss": 0.9153809547424316, "step": 1123 }, { "epoch": 0.39612334801762117, "grad_norm": 1.432058114739846, "learning_rate": 1.8828807203509953e-05, "loss": 0.7025514841079712, "step": 1124 }, { "epoch": 0.3964757709251101, "grad_norm": 1.3170228531933665, "learning_rate": 1.882606964528369e-05, "loss": 0.8254855275154114, "step": 1125 }, { "epoch": 0.3968281938325991, "grad_norm": 1.3015643549096694, "learning_rate": 1.8823329090881457e-05, "loss": 0.6812278032302856, "step": 1126 }, { "epoch": 0.3971806167400881, "grad_norm": 1.4379402990614556, "learning_rate": 1.8820585541233592e-05, "loss": 0.6570114493370056, "step": 1127 }, { "epoch": 0.3975330396475771, "grad_norm": 1.4245448514304093, "learning_rate": 1.881783899727144e-05, "loss": 0.636163592338562, "step": 1128 }, { "epoch": 0.39788546255506607, "grad_norm": 1.4535684365173425, "learning_rate": 1.8815089459927373e-05, "loss": 0.6744807958602905, "step": 1129 }, { "epoch": 0.39823788546255506, "grad_norm": 1.2654983836452696, "learning_rate": 1.8812336930134768e-05, "loss": 0.6739502549171448, "step": 1130 }, { "epoch": 0.39859030837004406, "grad_norm": 1.5274150360278067, "learning_rate": 1.8809581408828026e-05, "loss": 0.800058126449585, "step": 1131 }, { "epoch": 0.39894273127753305, "grad_norm": 1.293199138820765, "learning_rate": 1.880682289694256e-05, "loss": 0.7158734798431396, "step": 1132 }, { "epoch": 0.39929515418502204, "grad_norm": 1.426620948967722, "learning_rate": 1.8804061395414795e-05, "loss": 0.7142150402069092, "step": 1133 }, { "epoch": 0.39964757709251103, "grad_norm": 1.5712220679274596, "learning_rate": 1.8801296905182184e-05, "loss": 0.7830438613891602, "step": 1134 }, { "epoch": 0.4, "grad_norm": 1.3789411964854812, "learning_rate": 1.879852942718318e-05, "loss": 0.7037091255187988, "step": 1135 }, { "epoch": 0.400352422907489, "grad_norm": 1.5410576826642701, "learning_rate": 1.8795758962357254e-05, "loss": 0.7634316682815552, "step": 1136 }, { "epoch": 0.40070484581497795, "grad_norm": 1.3380525485574057, "learning_rate": 1.8792985511644895e-05, "loss": 0.8569636344909668, "step": 1137 }, { "epoch": 0.40105726872246694, "grad_norm": 1.4697640342217926, "learning_rate": 1.8790209075987603e-05, "loss": 0.8589881062507629, "step": 1138 }, { "epoch": 0.40140969162995593, "grad_norm": 1.4119711578026037, "learning_rate": 1.8787429656327892e-05, "loss": 0.6667177677154541, "step": 1139 }, { "epoch": 0.4017621145374449, "grad_norm": 1.5302691962759787, "learning_rate": 1.8784647253609286e-05, "loss": 0.8272922039031982, "step": 1140 }, { "epoch": 0.4021145374449339, "grad_norm": 1.4934073596410382, "learning_rate": 1.8781861868776328e-05, "loss": 0.735906720161438, "step": 1141 }, { "epoch": 0.4024669603524229, "grad_norm": 1.6214826290901958, "learning_rate": 1.8779073502774567e-05, "loss": 0.7496200799942017, "step": 1142 }, { "epoch": 0.4028193832599119, "grad_norm": 1.5379634398249482, "learning_rate": 1.8776282156550563e-05, "loss": 0.741244912147522, "step": 1143 }, { "epoch": 0.4031718061674009, "grad_norm": 1.6175484470841388, "learning_rate": 1.87734878310519e-05, "loss": 0.6074572205543518, "step": 1144 }, { "epoch": 0.4035242290748899, "grad_norm": 1.5403137415943102, "learning_rate": 1.8770690527227156e-05, "loss": 0.7852963805198669, "step": 1145 }, { "epoch": 0.4038766519823789, "grad_norm": 1.3167947695811832, "learning_rate": 1.8767890246025934e-05, "loss": 0.8041664361953735, "step": 1146 }, { "epoch": 0.40422907488986787, "grad_norm": 1.2847896666293108, "learning_rate": 1.876508698839884e-05, "loss": 0.6014564037322998, "step": 1147 }, { "epoch": 0.4045814977973568, "grad_norm": 1.6737775020761936, "learning_rate": 1.876228075529749e-05, "loss": 0.7389206886291504, "step": 1148 }, { "epoch": 0.4049339207048458, "grad_norm": 1.5291026740622409, "learning_rate": 1.875947154767452e-05, "loss": 0.7540062665939331, "step": 1149 }, { "epoch": 0.4052863436123348, "grad_norm": 1.5780731113626183, "learning_rate": 1.8756659366483564e-05, "loss": 0.6953487396240234, "step": 1150 }, { "epoch": 0.4056387665198238, "grad_norm": 1.8069469411894516, "learning_rate": 1.875384421267927e-05, "loss": 0.6715666055679321, "step": 1151 }, { "epoch": 0.40599118942731277, "grad_norm": 1.6113428960633331, "learning_rate": 1.8751026087217294e-05, "loss": 0.7763206362724304, "step": 1152 }, { "epoch": 0.40634361233480176, "grad_norm": 1.7227531605547286, "learning_rate": 1.8748204991054304e-05, "loss": 0.8445626497268677, "step": 1153 }, { "epoch": 0.40669603524229075, "grad_norm": 1.4170830085508515, "learning_rate": 1.8745380925147976e-05, "loss": 0.6789584159851074, "step": 1154 }, { "epoch": 0.40704845814977975, "grad_norm": 1.403092590323935, "learning_rate": 1.8742553890456986e-05, "loss": 0.6301349401473999, "step": 1155 }, { "epoch": 0.40740088105726874, "grad_norm": 1.243923442253091, "learning_rate": 1.873972388794103e-05, "loss": 0.5968909859657288, "step": 1156 }, { "epoch": 0.40775330396475773, "grad_norm": 1.489269903668207, "learning_rate": 1.873689091856081e-05, "loss": 0.759127676486969, "step": 1157 }, { "epoch": 0.4081057268722467, "grad_norm": 1.7062525426103168, "learning_rate": 1.873405498327802e-05, "loss": 0.8113895654678345, "step": 1158 }, { "epoch": 0.40845814977973566, "grad_norm": 2.2841166697739266, "learning_rate": 1.8731216083055373e-05, "loss": 0.6294944286346436, "step": 1159 }, { "epoch": 0.40881057268722465, "grad_norm": 1.7643300465666825, "learning_rate": 1.87283742188566e-05, "loss": 0.7024469375610352, "step": 1160 }, { "epoch": 0.40916299559471364, "grad_norm": 1.6192702903054457, "learning_rate": 1.8725529391646413e-05, "loss": 0.6593793034553528, "step": 1161 }, { "epoch": 0.40951541850220263, "grad_norm": 1.491465083071803, "learning_rate": 1.8722681602390548e-05, "loss": 0.72177654504776, "step": 1162 }, { "epoch": 0.4098678414096916, "grad_norm": 1.5089448151062697, "learning_rate": 1.8719830852055736e-05, "loss": 0.7099393606185913, "step": 1163 }, { "epoch": 0.4102202643171806, "grad_norm": 1.3870038981594819, "learning_rate": 1.871697714160972e-05, "loss": 0.6221687197685242, "step": 1164 }, { "epoch": 0.4105726872246696, "grad_norm": 1.6034975452453926, "learning_rate": 1.8714120472021252e-05, "loss": 0.7236911058425903, "step": 1165 }, { "epoch": 0.4109251101321586, "grad_norm": 1.6733335742616042, "learning_rate": 1.8711260844260072e-05, "loss": 0.6777583360671997, "step": 1166 }, { "epoch": 0.4112775330396476, "grad_norm": 1.2685396486773262, "learning_rate": 1.870839825929694e-05, "loss": 0.6408713459968567, "step": 1167 }, { "epoch": 0.4116299559471366, "grad_norm": 1.5501797457897155, "learning_rate": 1.870553271810362e-05, "loss": 0.6081968545913696, "step": 1168 }, { "epoch": 0.4119823788546256, "grad_norm": 1.324315376857478, "learning_rate": 1.8702664221652864e-05, "loss": 0.7269757986068726, "step": 1169 }, { "epoch": 0.4123348017621145, "grad_norm": 1.359571395974998, "learning_rate": 1.8699792770918443e-05, "loss": 0.6563149094581604, "step": 1170 }, { "epoch": 0.4126872246696035, "grad_norm": 1.412304869808958, "learning_rate": 1.8696918366875123e-05, "loss": 0.6900039911270142, "step": 1171 }, { "epoch": 0.4130396475770925, "grad_norm": 1.6666238046463622, "learning_rate": 1.869404101049868e-05, "loss": 0.6575014591217041, "step": 1172 }, { "epoch": 0.4133920704845815, "grad_norm": 1.7453316480937289, "learning_rate": 1.8691160702765878e-05, "loss": 0.8178410530090332, "step": 1173 }, { "epoch": 0.4137444933920705, "grad_norm": 1.2369225358107252, "learning_rate": 1.8688277444654495e-05, "loss": 0.6247331500053406, "step": 1174 }, { "epoch": 0.41409691629955947, "grad_norm": 1.4809443864869283, "learning_rate": 1.868539123714331e-05, "loss": 0.7220792770385742, "step": 1175 }, { "epoch": 0.41444933920704846, "grad_norm": 1.3133478143499064, "learning_rate": 1.8682502081212104e-05, "loss": 0.6279594302177429, "step": 1176 }, { "epoch": 0.41480176211453745, "grad_norm": 1.9965951061666904, "learning_rate": 1.8679609977841646e-05, "loss": 0.8814467787742615, "step": 1177 }, { "epoch": 0.41515418502202645, "grad_norm": 1.337413771448709, "learning_rate": 1.867671492801372e-05, "loss": 0.6601974368095398, "step": 1178 }, { "epoch": 0.41550660792951544, "grad_norm": 1.5188708939818696, "learning_rate": 1.8673816932711107e-05, "loss": 0.7004785537719727, "step": 1179 }, { "epoch": 0.41585903083700443, "grad_norm": 1.5057078901191085, "learning_rate": 1.8670915992917586e-05, "loss": 0.7409330606460571, "step": 1180 }, { "epoch": 0.41621145374449336, "grad_norm": 1.4232223858260633, "learning_rate": 1.8668012109617933e-05, "loss": 0.6698065996170044, "step": 1181 }, { "epoch": 0.41656387665198236, "grad_norm": 1.5925482634189316, "learning_rate": 1.8665105283797927e-05, "loss": 0.7420671582221985, "step": 1182 }, { "epoch": 0.41691629955947135, "grad_norm": 1.5560634478711484, "learning_rate": 1.8662195516444345e-05, "loss": 0.7719774842262268, "step": 1183 }, { "epoch": 0.41726872246696034, "grad_norm": 1.4792437797078573, "learning_rate": 1.8659282808544966e-05, "loss": 0.6206108331680298, "step": 1184 }, { "epoch": 0.41762114537444933, "grad_norm": 1.3470893025550628, "learning_rate": 1.865636716108856e-05, "loss": 0.799741268157959, "step": 1185 }, { "epoch": 0.4179735682819383, "grad_norm": 1.419455186886867, "learning_rate": 1.8653448575064893e-05, "loss": 0.6839771866798401, "step": 1186 }, { "epoch": 0.4183259911894273, "grad_norm": 1.4763673797370565, "learning_rate": 1.8650527051464744e-05, "loss": 0.7937930822372437, "step": 1187 }, { "epoch": 0.4186784140969163, "grad_norm": 2.8190993538517524, "learning_rate": 1.8647602591279873e-05, "loss": 0.6819020509719849, "step": 1188 }, { "epoch": 0.4190308370044053, "grad_norm": 1.3567646132379503, "learning_rate": 1.864467519550305e-05, "loss": 0.75614994764328, "step": 1189 }, { "epoch": 0.4193832599118943, "grad_norm": 1.567742841021855, "learning_rate": 1.864174486512803e-05, "loss": 0.6966177225112915, "step": 1190 }, { "epoch": 0.4197356828193833, "grad_norm": 1.7710714107881367, "learning_rate": 1.8638811601149568e-05, "loss": 0.821509838104248, "step": 1191 }, { "epoch": 0.4200881057268723, "grad_norm": 1.2328562386437087, "learning_rate": 1.8635875404563414e-05, "loss": 0.5905138254165649, "step": 1192 }, { "epoch": 0.4204405286343612, "grad_norm": 1.4647056442197128, "learning_rate": 1.8632936276366323e-05, "loss": 0.6856247186660767, "step": 1193 }, { "epoch": 0.4207929515418502, "grad_norm": 1.4886760353067057, "learning_rate": 1.862999421755603e-05, "loss": 0.745036244392395, "step": 1194 }, { "epoch": 0.4211453744493392, "grad_norm": 1.1750279689329006, "learning_rate": 1.8627049229131276e-05, "loss": 0.6503005027770996, "step": 1195 }, { "epoch": 0.4214977973568282, "grad_norm": 1.5431880343600168, "learning_rate": 1.86241013120918e-05, "loss": 0.7498307228088379, "step": 1196 }, { "epoch": 0.4218502202643172, "grad_norm": 1.3468463845976426, "learning_rate": 1.862115046743831e-05, "loss": 0.7928652763366699, "step": 1197 }, { "epoch": 0.42220264317180617, "grad_norm": 1.2342083264732957, "learning_rate": 1.861819669617254e-05, "loss": 0.6854137182235718, "step": 1198 }, { "epoch": 0.42255506607929516, "grad_norm": 1.2078818370142543, "learning_rate": 1.86152399992972e-05, "loss": 0.6196715831756592, "step": 1199 }, { "epoch": 0.42290748898678415, "grad_norm": 1.3970249114344502, "learning_rate": 1.8612280377816e-05, "loss": 0.6937464475631714, "step": 1200 }, { "epoch": 0.42325991189427314, "grad_norm": 1.68603514212184, "learning_rate": 1.860931783273363e-05, "loss": 0.7681070566177368, "step": 1201 }, { "epoch": 0.42361233480176214, "grad_norm": 1.1472443629032707, "learning_rate": 1.860635236505579e-05, "loss": 0.676302969455719, "step": 1202 }, { "epoch": 0.4239647577092511, "grad_norm": 1.3856112594345633, "learning_rate": 1.8603383975789168e-05, "loss": 0.6533253192901611, "step": 1203 }, { "epoch": 0.42431718061674006, "grad_norm": 1.3469284337535972, "learning_rate": 1.860041266594143e-05, "loss": 0.689995288848877, "step": 1204 }, { "epoch": 0.42466960352422906, "grad_norm": 1.5007772835228577, "learning_rate": 1.859743843652124e-05, "loss": 0.8129922747612, "step": 1205 }, { "epoch": 0.42502202643171805, "grad_norm": 1.5410683437680426, "learning_rate": 1.859446128853827e-05, "loss": 0.8388077616691589, "step": 1206 }, { "epoch": 0.42537444933920704, "grad_norm": 1.5558529097869003, "learning_rate": 1.859148122300316e-05, "loss": 0.8795225024223328, "step": 1207 }, { "epoch": 0.42572687224669603, "grad_norm": 1.1213374735945745, "learning_rate": 1.858849824092755e-05, "loss": 0.7340251803398132, "step": 1208 }, { "epoch": 0.426079295154185, "grad_norm": 1.4951423694810024, "learning_rate": 1.8585512343324073e-05, "loss": 0.8028355240821838, "step": 1209 }, { "epoch": 0.426431718061674, "grad_norm": 1.4585659256901293, "learning_rate": 1.8582523531206345e-05, "loss": 0.8469998836517334, "step": 1210 }, { "epoch": 0.426784140969163, "grad_norm": 1.5383443322846213, "learning_rate": 1.857953180558898e-05, "loss": 0.7562716007232666, "step": 1211 }, { "epoch": 0.427136563876652, "grad_norm": 1.4113837543209433, "learning_rate": 1.857653716748757e-05, "loss": 0.7166177034378052, "step": 1212 }, { "epoch": 0.427488986784141, "grad_norm": 1.5418199345701933, "learning_rate": 1.85735396179187e-05, "loss": 0.6946159601211548, "step": 1213 }, { "epoch": 0.42784140969163, "grad_norm": 1.317478160039542, "learning_rate": 1.8570539157899953e-05, "loss": 0.5341482758522034, "step": 1214 }, { "epoch": 0.4281938325991189, "grad_norm": 1.4287482623115888, "learning_rate": 1.8567535788449886e-05, "loss": 0.8128249645233154, "step": 1215 }, { "epoch": 0.4285462555066079, "grad_norm": 1.34325298688053, "learning_rate": 1.8564529510588046e-05, "loss": 0.7136335372924805, "step": 1216 }, { "epoch": 0.4288986784140969, "grad_norm": 1.358163949395023, "learning_rate": 1.856152032533498e-05, "loss": 0.6737562417984009, "step": 1217 }, { "epoch": 0.4292511013215859, "grad_norm": 1.306172251281951, "learning_rate": 1.855850823371221e-05, "loss": 0.8102772235870361, "step": 1218 }, { "epoch": 0.4296035242290749, "grad_norm": 1.4109010281873726, "learning_rate": 1.855549323674224e-05, "loss": 0.7389130592346191, "step": 1219 }, { "epoch": 0.4299559471365639, "grad_norm": 1.6519920374913426, "learning_rate": 1.8552475335448575e-05, "loss": 0.9127305746078491, "step": 1220 }, { "epoch": 0.43030837004405287, "grad_norm": 1.4401162301668198, "learning_rate": 1.8549454530855697e-05, "loss": 0.7599691152572632, "step": 1221 }, { "epoch": 0.43066079295154186, "grad_norm": 1.59834239528244, "learning_rate": 1.8546430823989075e-05, "loss": 0.8343819379806519, "step": 1222 }, { "epoch": 0.43101321585903085, "grad_norm": 1.7081796080725813, "learning_rate": 1.8543404215875163e-05, "loss": 0.7759256362915039, "step": 1223 }, { "epoch": 0.43136563876651984, "grad_norm": 1.3364188660639875, "learning_rate": 1.8540374707541398e-05, "loss": 0.7803373336791992, "step": 1224 }, { "epoch": 0.43171806167400884, "grad_norm": 1.4538494145578122, "learning_rate": 1.8537342300016208e-05, "loss": 0.6292921304702759, "step": 1225 }, { "epoch": 0.43207048458149777, "grad_norm": 1.4521641959343445, "learning_rate": 1.8534306994329e-05, "loss": 0.8495175838470459, "step": 1226 }, { "epoch": 0.43242290748898676, "grad_norm": 1.3062742481146943, "learning_rate": 1.8531268791510167e-05, "loss": 0.6141406297683716, "step": 1227 }, { "epoch": 0.43277533039647575, "grad_norm": 1.576341879030456, "learning_rate": 1.8528227692591076e-05, "loss": 0.7087793350219727, "step": 1228 }, { "epoch": 0.43312775330396475, "grad_norm": 1.5442094308389636, "learning_rate": 1.8525183698604098e-05, "loss": 0.7919498682022095, "step": 1229 }, { "epoch": 0.43348017621145374, "grad_norm": 1.317139155945084, "learning_rate": 1.8522136810582563e-05, "loss": 0.7408226728439331, "step": 1230 }, { "epoch": 0.43383259911894273, "grad_norm": 1.407715848952146, "learning_rate": 1.85190870295608e-05, "loss": 0.7140083312988281, "step": 1231 }, { "epoch": 0.4341850220264317, "grad_norm": 1.4117801977693214, "learning_rate": 1.8516034356574118e-05, "loss": 0.7211521863937378, "step": 1232 }, { "epoch": 0.4345374449339207, "grad_norm": 1.1753876244240768, "learning_rate": 1.85129787926588e-05, "loss": 0.7103208303451538, "step": 1233 }, { "epoch": 0.4348898678414097, "grad_norm": 1.4479636604064312, "learning_rate": 1.850992033885211e-05, "loss": 0.816985011100769, "step": 1234 }, { "epoch": 0.4352422907488987, "grad_norm": 1.4368000528699751, "learning_rate": 1.850685899619231e-05, "loss": 0.6678498983383179, "step": 1235 }, { "epoch": 0.4355947136563877, "grad_norm": 1.4259303259837681, "learning_rate": 1.8503794765718622e-05, "loss": 0.7895394563674927, "step": 1236 }, { "epoch": 0.4359471365638766, "grad_norm": 1.4256180200365283, "learning_rate": 1.8500727648471258e-05, "loss": 0.7295971512794495, "step": 1237 }, { "epoch": 0.4362995594713656, "grad_norm": 1.552299015894991, "learning_rate": 1.849765764549141e-05, "loss": 0.7216300964355469, "step": 1238 }, { "epoch": 0.4366519823788546, "grad_norm": 2.585430848560662, "learning_rate": 1.8494584757821252e-05, "loss": 0.8088986873626709, "step": 1239 }, { "epoch": 0.4370044052863436, "grad_norm": 1.3100612400703413, "learning_rate": 1.8491508986503928e-05, "loss": 0.7380663156509399, "step": 1240 }, { "epoch": 0.4373568281938326, "grad_norm": 1.6225248085666293, "learning_rate": 1.8488430332583566e-05, "loss": 0.8671622276306152, "step": 1241 }, { "epoch": 0.4377092511013216, "grad_norm": 1.2548349586148027, "learning_rate": 1.8485348797105277e-05, "loss": 0.6649274826049805, "step": 1242 }, { "epoch": 0.4380616740088106, "grad_norm": 1.3492988450242405, "learning_rate": 1.848226438111515e-05, "loss": 0.740972638130188, "step": 1243 }, { "epoch": 0.43841409691629957, "grad_norm": 1.4062352938849376, "learning_rate": 1.8479177085660237e-05, "loss": 0.6593915820121765, "step": 1244 }, { "epoch": 0.43876651982378856, "grad_norm": 1.567811244473075, "learning_rate": 1.8476086911788588e-05, "loss": 0.792604923248291, "step": 1245 }, { "epoch": 0.43911894273127755, "grad_norm": 1.583820790059346, "learning_rate": 1.8472993860549216e-05, "loss": 0.7521885633468628, "step": 1246 }, { "epoch": 0.43947136563876654, "grad_norm": 1.4520072830804587, "learning_rate": 1.846989793299212e-05, "loss": 0.7242246270179749, "step": 1247 }, { "epoch": 0.43982378854625553, "grad_norm": 1.2892821056189339, "learning_rate": 1.846679913016827e-05, "loss": 0.7343394160270691, "step": 1248 }, { "epoch": 0.44017621145374447, "grad_norm": 1.2525729631593605, "learning_rate": 1.846369745312961e-05, "loss": 0.747876763343811, "step": 1249 }, { "epoch": 0.44052863436123346, "grad_norm": 1.428983542355963, "learning_rate": 1.8460592902929064e-05, "loss": 0.7280946969985962, "step": 1250 }, { "epoch": 0.44088105726872245, "grad_norm": 1.4254243168735732, "learning_rate": 1.845748548062053e-05, "loss": 0.7288519144058228, "step": 1251 }, { "epoch": 0.44123348017621145, "grad_norm": 1.4847519735948493, "learning_rate": 1.8454375187258885e-05, "loss": 0.6269914507865906, "step": 1252 }, { "epoch": 0.44158590308370044, "grad_norm": 1.5355271633317282, "learning_rate": 1.8451262023899973e-05, "loss": 0.7848949432373047, "step": 1253 }, { "epoch": 0.44193832599118943, "grad_norm": 1.580356922946946, "learning_rate": 1.8448145991600618e-05, "loss": 0.7306517958641052, "step": 1254 }, { "epoch": 0.4422907488986784, "grad_norm": 1.3971874565683924, "learning_rate": 1.8445027091418614e-05, "loss": 0.6933906078338623, "step": 1255 }, { "epoch": 0.4426431718061674, "grad_norm": 1.2942221540854206, "learning_rate": 1.8441905324412732e-05, "loss": 0.8260579109191895, "step": 1256 }, { "epoch": 0.4429955947136564, "grad_norm": 1.4276139754434451, "learning_rate": 1.8438780691642712e-05, "loss": 0.6818344593048096, "step": 1257 }, { "epoch": 0.4433480176211454, "grad_norm": 1.5571344695334373, "learning_rate": 1.8435653194169274e-05, "loss": 0.5980014801025391, "step": 1258 }, { "epoch": 0.4437004405286344, "grad_norm": 1.6363647319534165, "learning_rate": 1.8432522833054102e-05, "loss": 0.7694655656814575, "step": 1259 }, { "epoch": 0.4440528634361233, "grad_norm": 1.4888452953161495, "learning_rate": 1.842938960935986e-05, "loss": 0.6861646771430969, "step": 1260 }, { "epoch": 0.4444052863436123, "grad_norm": 1.5245731543783476, "learning_rate": 1.8426253524150176e-05, "loss": 0.7346323728561401, "step": 1261 }, { "epoch": 0.4447577092511013, "grad_norm": 1.5555183873270297, "learning_rate": 1.8423114578489657e-05, "loss": 0.7116265296936035, "step": 1262 }, { "epoch": 0.4451101321585903, "grad_norm": 1.3587295641859045, "learning_rate": 1.8419972773443877e-05, "loss": 0.7148594856262207, "step": 1263 }, { "epoch": 0.4454625550660793, "grad_norm": 1.4208610042885819, "learning_rate": 1.8416828110079378e-05, "loss": 0.6629737615585327, "step": 1264 }, { "epoch": 0.4458149779735683, "grad_norm": 1.2215430932959532, "learning_rate": 1.8413680589463677e-05, "loss": 0.5734454393386841, "step": 1265 }, { "epoch": 0.4461674008810573, "grad_norm": 1.4728067026699625, "learning_rate": 1.8410530212665258e-05, "loss": 0.8129212260246277, "step": 1266 }, { "epoch": 0.44651982378854627, "grad_norm": 1.5823039225136746, "learning_rate": 1.8407376980753578e-05, "loss": 0.7408754825592041, "step": 1267 }, { "epoch": 0.44687224669603526, "grad_norm": 2.9520848026313633, "learning_rate": 1.840422089479906e-05, "loss": 0.7315034866333008, "step": 1268 }, { "epoch": 0.44722466960352425, "grad_norm": 1.453693040198655, "learning_rate": 1.8401061955873102e-05, "loss": 0.6774684190750122, "step": 1269 }, { "epoch": 0.44757709251101324, "grad_norm": 1.4189733125983666, "learning_rate": 1.8397900165048055e-05, "loss": 0.6615294814109802, "step": 1270 }, { "epoch": 0.4479295154185022, "grad_norm": 1.465563156151872, "learning_rate": 1.8394735523397258e-05, "loss": 0.6757136583328247, "step": 1271 }, { "epoch": 0.44828193832599117, "grad_norm": 1.3581337883847424, "learning_rate": 1.8391568031995004e-05, "loss": 0.6395466327667236, "step": 1272 }, { "epoch": 0.44863436123348016, "grad_norm": 1.3957918327614203, "learning_rate": 1.8388397691916556e-05, "loss": 0.6436404585838318, "step": 1273 }, { "epoch": 0.44898678414096915, "grad_norm": 1.2217258095016672, "learning_rate": 1.838522450423815e-05, "loss": 0.6280484199523926, "step": 1274 }, { "epoch": 0.44933920704845814, "grad_norm": 1.3831470857016404, "learning_rate": 1.8382048470036983e-05, "loss": 0.7485225200653076, "step": 1275 }, { "epoch": 0.44969162995594714, "grad_norm": 1.5437699808102354, "learning_rate": 1.8378869590391217e-05, "loss": 0.745079517364502, "step": 1276 }, { "epoch": 0.45004405286343613, "grad_norm": 1.5902187054867891, "learning_rate": 1.8375687866379988e-05, "loss": 0.656510591506958, "step": 1277 }, { "epoch": 0.4503964757709251, "grad_norm": 1.542738255105748, "learning_rate": 1.8372503299083392e-05, "loss": 0.7122445106506348, "step": 1278 }, { "epoch": 0.4507488986784141, "grad_norm": 1.5368544285826038, "learning_rate": 1.8369315889582483e-05, "loss": 0.7402621507644653, "step": 1279 }, { "epoch": 0.4511013215859031, "grad_norm": 1.539047411882563, "learning_rate": 1.8366125638959292e-05, "loss": 0.79311203956604, "step": 1280 }, { "epoch": 0.4514537444933921, "grad_norm": 1.5615114889746888, "learning_rate": 1.8362932548296815e-05, "loss": 0.7748456001281738, "step": 1281 }, { "epoch": 0.45180616740088103, "grad_norm": 1.4203050333533118, "learning_rate": 1.8359736618679e-05, "loss": 0.8285728096961975, "step": 1282 }, { "epoch": 0.45215859030837, "grad_norm": 1.5541412727714081, "learning_rate": 1.835653785119076e-05, "loss": 0.7874733209609985, "step": 1283 }, { "epoch": 0.452511013215859, "grad_norm": 1.990742110424804, "learning_rate": 1.8353336246917996e-05, "loss": 0.8984566926956177, "step": 1284 }, { "epoch": 0.452863436123348, "grad_norm": 1.5779572276747513, "learning_rate": 1.8350131806947537e-05, "loss": 0.7730413675308228, "step": 1285 }, { "epoch": 0.453215859030837, "grad_norm": 1.7109096071986905, "learning_rate": 1.8346924532367195e-05, "loss": 0.6064612865447998, "step": 1286 }, { "epoch": 0.453568281938326, "grad_norm": 1.2417304411100711, "learning_rate": 1.8343714424265742e-05, "loss": 0.6946402192115784, "step": 1287 }, { "epoch": 0.453920704845815, "grad_norm": 1.4035686433407615, "learning_rate": 1.8340501483732908e-05, "loss": 0.6131751537322998, "step": 1288 }, { "epoch": 0.454273127753304, "grad_norm": 1.5800587203565855, "learning_rate": 1.833728571185938e-05, "loss": 0.7251182794570923, "step": 1289 }, { "epoch": 0.45462555066079297, "grad_norm": 1.4036983560957499, "learning_rate": 1.8334067109736826e-05, "loss": 0.6548069715499878, "step": 1290 }, { "epoch": 0.45497797356828196, "grad_norm": 1.3998869795024185, "learning_rate": 1.833084567845785e-05, "loss": 0.7416098117828369, "step": 1291 }, { "epoch": 0.45533039647577095, "grad_norm": 2.508404695128388, "learning_rate": 1.8327621419116034e-05, "loss": 0.7320964932441711, "step": 1292 }, { "epoch": 0.4556828193832599, "grad_norm": 1.3052290617356537, "learning_rate": 1.8324394332805913e-05, "loss": 0.5926196575164795, "step": 1293 }, { "epoch": 0.4560352422907489, "grad_norm": 1.5674410721277312, "learning_rate": 1.8321164420622977e-05, "loss": 0.5294085741043091, "step": 1294 }, { "epoch": 0.45638766519823787, "grad_norm": 1.2785938430138426, "learning_rate": 1.8317931683663688e-05, "loss": 0.6332723498344421, "step": 1295 }, { "epoch": 0.45674008810572686, "grad_norm": 1.5962686180302166, "learning_rate": 1.8314696123025456e-05, "loss": 0.8361148834228516, "step": 1296 }, { "epoch": 0.45709251101321585, "grad_norm": 1.4587382180744954, "learning_rate": 1.8311457739806648e-05, "loss": 0.8097354173660278, "step": 1297 }, { "epoch": 0.45744493392070484, "grad_norm": 1.5247898400944095, "learning_rate": 1.8308216535106606e-05, "loss": 0.8619102239608765, "step": 1298 }, { "epoch": 0.45779735682819384, "grad_norm": 1.7222438621078806, "learning_rate": 1.8304972510025607e-05, "loss": 0.8149014711380005, "step": 1299 }, { "epoch": 0.4581497797356828, "grad_norm": 1.4821216839710079, "learning_rate": 1.8301725665664904e-05, "loss": 0.6217210292816162, "step": 1300 }, { "epoch": 0.4585022026431718, "grad_norm": 1.3606031472973286, "learning_rate": 1.8298476003126695e-05, "loss": 0.7496612071990967, "step": 1301 }, { "epoch": 0.4588546255506608, "grad_norm": 1.3221676149271377, "learning_rate": 1.8295223523514144e-05, "loss": 0.743242084980011, "step": 1302 }, { "epoch": 0.4592070484581498, "grad_norm": 1.3745674408132749, "learning_rate": 1.829196822793136e-05, "loss": 0.6425061821937561, "step": 1303 }, { "epoch": 0.4595594713656388, "grad_norm": 1.6216951689157317, "learning_rate": 1.828871011748342e-05, "loss": 0.8274835348129272, "step": 1304 }, { "epoch": 0.45991189427312773, "grad_norm": 1.2722833909738493, "learning_rate": 1.828544919327635e-05, "loss": 0.6403865814208984, "step": 1305 }, { "epoch": 0.4602643171806167, "grad_norm": 1.234115960449283, "learning_rate": 1.828218545641713e-05, "loss": 0.6585257053375244, "step": 1306 }, { "epoch": 0.4606167400881057, "grad_norm": 1.2325421263478973, "learning_rate": 1.82789189080137e-05, "loss": 0.6467862129211426, "step": 1307 }, { "epoch": 0.4609691629955947, "grad_norm": 1.403654297681647, "learning_rate": 1.827564954917495e-05, "loss": 0.8656524419784546, "step": 1308 }, { "epoch": 0.4613215859030837, "grad_norm": 1.449712147167455, "learning_rate": 1.8272377381010726e-05, "loss": 0.7298469543457031, "step": 1309 }, { "epoch": 0.4616740088105727, "grad_norm": 1.575558340533703, "learning_rate": 1.8269102404631826e-05, "loss": 0.7342871427536011, "step": 1310 }, { "epoch": 0.4620264317180617, "grad_norm": 1.4177026442874099, "learning_rate": 1.8265824621150005e-05, "loss": 0.7437269687652588, "step": 1311 }, { "epoch": 0.4623788546255507, "grad_norm": 1.370008690924395, "learning_rate": 1.8262544031677965e-05, "loss": 0.6761496067047119, "step": 1312 }, { "epoch": 0.46273127753303966, "grad_norm": 1.3488719018465838, "learning_rate": 1.825926063732937e-05, "loss": 0.6504565477371216, "step": 1313 }, { "epoch": 0.46308370044052866, "grad_norm": 1.5002490307110308, "learning_rate": 1.8255974439218826e-05, "loss": 0.7058892250061035, "step": 1314 }, { "epoch": 0.46343612334801765, "grad_norm": 1.37061056314256, "learning_rate": 1.8252685438461893e-05, "loss": 0.704500675201416, "step": 1315 }, { "epoch": 0.4637885462555066, "grad_norm": 1.3921050444029468, "learning_rate": 1.824939363617509e-05, "loss": 0.7438445091247559, "step": 1316 }, { "epoch": 0.4641409691629956, "grad_norm": 1.4372002500080507, "learning_rate": 1.8246099033475872e-05, "loss": 0.6610915660858154, "step": 1317 }, { "epoch": 0.46449339207048457, "grad_norm": 1.0745723869419856, "learning_rate": 1.8242801631482666e-05, "loss": 0.5868711471557617, "step": 1318 }, { "epoch": 0.46484581497797356, "grad_norm": 1.192238188456442, "learning_rate": 1.8239501431314828e-05, "loss": 0.7403215765953064, "step": 1319 }, { "epoch": 0.46519823788546255, "grad_norm": 1.2444894883495399, "learning_rate": 1.823619843409268e-05, "loss": 0.6836927533149719, "step": 1320 }, { "epoch": 0.46555066079295154, "grad_norm": 1.4619703465719247, "learning_rate": 1.8232892640937482e-05, "loss": 0.744488537311554, "step": 1321 }, { "epoch": 0.46590308370044053, "grad_norm": 1.6337099192848834, "learning_rate": 1.822958405297145e-05, "loss": 0.8203051090240479, "step": 1322 }, { "epoch": 0.4662555066079295, "grad_norm": 1.184261838198034, "learning_rate": 1.8226272671317747e-05, "loss": 0.6452913284301758, "step": 1323 }, { "epoch": 0.4666079295154185, "grad_norm": 1.6458345614686154, "learning_rate": 1.8222958497100482e-05, "loss": 0.7362639307975769, "step": 1324 }, { "epoch": 0.4669603524229075, "grad_norm": 6.608293048647877, "learning_rate": 1.8219641531444713e-05, "loss": 0.8192600011825562, "step": 1325 }, { "epoch": 0.4673127753303965, "grad_norm": 1.4257376230679313, "learning_rate": 1.8216321775476452e-05, "loss": 0.8391410112380981, "step": 1326 }, { "epoch": 0.46766519823788544, "grad_norm": 1.3133795307817668, "learning_rate": 1.8212999230322648e-05, "loss": 0.8723593354225159, "step": 1327 }, { "epoch": 0.46801762114537443, "grad_norm": 1.4218119484201381, "learning_rate": 1.8209673897111208e-05, "loss": 0.6891233921051025, "step": 1328 }, { "epoch": 0.4683700440528634, "grad_norm": 1.414801660380672, "learning_rate": 1.820634577697097e-05, "loss": 0.6585180759429932, "step": 1329 }, { "epoch": 0.4687224669603524, "grad_norm": 1.503205293925671, "learning_rate": 1.8203014871031732e-05, "loss": 0.9556418657302856, "step": 1330 }, { "epoch": 0.4690748898678414, "grad_norm": 1.491345239113851, "learning_rate": 1.8199681180424234e-05, "loss": 0.803380012512207, "step": 1331 }, { "epoch": 0.4694273127753304, "grad_norm": 1.6217603270172032, "learning_rate": 1.819634470628016e-05, "loss": 0.7090115547180176, "step": 1332 }, { "epoch": 0.4697797356828194, "grad_norm": 1.6705712009535991, "learning_rate": 1.8193005449732134e-05, "loss": 0.6314720511436462, "step": 1333 }, { "epoch": 0.4701321585903084, "grad_norm": 1.4756439095691731, "learning_rate": 1.8189663411913737e-05, "loss": 0.72248375415802, "step": 1334 }, { "epoch": 0.47048458149779737, "grad_norm": 1.2477075880097683, "learning_rate": 1.818631859395948e-05, "loss": 0.6192474961280823, "step": 1335 }, { "epoch": 0.47083700440528636, "grad_norm": 1.4944381119847567, "learning_rate": 1.818297099700483e-05, "loss": 0.6354564428329468, "step": 1336 }, { "epoch": 0.47118942731277536, "grad_norm": 1.3129251382794922, "learning_rate": 1.817962062218619e-05, "loss": 0.7577195167541504, "step": 1337 }, { "epoch": 0.4715418502202643, "grad_norm": 1.5504293722974503, "learning_rate": 1.8176267470640908e-05, "loss": 0.8064994812011719, "step": 1338 }, { "epoch": 0.4718942731277533, "grad_norm": 1.211182925950848, "learning_rate": 1.8172911543507276e-05, "loss": 0.5994154214859009, "step": 1339 }, { "epoch": 0.4722466960352423, "grad_norm": 1.701641381957404, "learning_rate": 1.8169552841924524e-05, "loss": 0.7483634948730469, "step": 1340 }, { "epoch": 0.47259911894273127, "grad_norm": 1.5346948984560977, "learning_rate": 1.8166191367032828e-05, "loss": 0.817699134349823, "step": 1341 }, { "epoch": 0.47295154185022026, "grad_norm": 1.4634504483386954, "learning_rate": 1.8162827119973305e-05, "loss": 0.7262923717498779, "step": 1342 }, { "epoch": 0.47330396475770925, "grad_norm": 1.6796646988667925, "learning_rate": 1.8159460101888013e-05, "loss": 0.6097851991653442, "step": 1343 }, { "epoch": 0.47365638766519824, "grad_norm": 1.3148094915971675, "learning_rate": 1.8156090313919944e-05, "loss": 0.7258971929550171, "step": 1344 }, { "epoch": 0.47400881057268723, "grad_norm": 1.198607169385478, "learning_rate": 1.8152717757213045e-05, "loss": 0.6300361156463623, "step": 1345 }, { "epoch": 0.4743612334801762, "grad_norm": 1.397827708634256, "learning_rate": 1.8149342432912184e-05, "loss": 0.7339942455291748, "step": 1346 }, { "epoch": 0.4747136563876652, "grad_norm": 1.4524082687419129, "learning_rate": 1.8145964342163188e-05, "loss": 0.7520095109939575, "step": 1347 }, { "epoch": 0.4750660792951542, "grad_norm": 1.6587168399408485, "learning_rate": 1.814258348611281e-05, "loss": 0.7276853322982788, "step": 1348 }, { "epoch": 0.47541850220264315, "grad_norm": 1.4463166573664321, "learning_rate": 1.8139199865908742e-05, "loss": 0.8004029989242554, "step": 1349 }, { "epoch": 0.47577092511013214, "grad_norm": 1.4508723815154267, "learning_rate": 1.8135813482699623e-05, "loss": 0.6932536363601685, "step": 1350 }, { "epoch": 0.47612334801762113, "grad_norm": 1.8868515127553653, "learning_rate": 1.8132424337635026e-05, "loss": 0.7697082161903381, "step": 1351 }, { "epoch": 0.4764757709251101, "grad_norm": 1.246718000700102, "learning_rate": 1.8129032431865453e-05, "loss": 0.6472513675689697, "step": 1352 }, { "epoch": 0.4768281938325991, "grad_norm": 1.413046013449196, "learning_rate": 1.8125637766542353e-05, "loss": 0.6483110785484314, "step": 1353 }, { "epoch": 0.4771806167400881, "grad_norm": 1.4854860856809686, "learning_rate": 1.8122240342818113e-05, "loss": 0.5495485067367554, "step": 1354 }, { "epoch": 0.4775330396475771, "grad_norm": 1.2801602602197804, "learning_rate": 1.811884016184605e-05, "loss": 0.5235577821731567, "step": 1355 }, { "epoch": 0.4778854625550661, "grad_norm": 1.734412256759482, "learning_rate": 1.811543722478042e-05, "loss": 0.7852121591567993, "step": 1356 }, { "epoch": 0.4782378854625551, "grad_norm": 1.3650060645350073, "learning_rate": 1.811203153277641e-05, "loss": 0.6704862713813782, "step": 1357 }, { "epoch": 0.47859030837004407, "grad_norm": 1.6553040991032588, "learning_rate": 1.8108623086990156e-05, "loss": 0.5964453220367432, "step": 1358 }, { "epoch": 0.47894273127753306, "grad_norm": 1.3936312619950861, "learning_rate": 1.8105211888578708e-05, "loss": 0.6697995066642761, "step": 1359 }, { "epoch": 0.479295154185022, "grad_norm": 1.5031130965144783, "learning_rate": 1.810179793870007e-05, "loss": 0.6335821151733398, "step": 1360 }, { "epoch": 0.479647577092511, "grad_norm": 1.5635708705560234, "learning_rate": 1.8098381238513173e-05, "loss": 0.7925145626068115, "step": 1361 }, { "epoch": 0.48, "grad_norm": 1.3011545804458011, "learning_rate": 1.809496178917787e-05, "loss": 0.6567563414573669, "step": 1362 }, { "epoch": 0.480352422907489, "grad_norm": 1.6816341182204335, "learning_rate": 1.809153959185497e-05, "loss": 0.6318811178207397, "step": 1363 }, { "epoch": 0.48070484581497797, "grad_norm": 1.6781349693525882, "learning_rate": 1.8088114647706195e-05, "loss": 0.7309727668762207, "step": 1364 }, { "epoch": 0.48105726872246696, "grad_norm": 1.689289351270497, "learning_rate": 1.8084686957894207e-05, "loss": 0.7109836339950562, "step": 1365 }, { "epoch": 0.48140969162995595, "grad_norm": 1.5638040238741844, "learning_rate": 1.8081256523582604e-05, "loss": 0.7475707530975342, "step": 1366 }, { "epoch": 0.48176211453744494, "grad_norm": 1.492251829838995, "learning_rate": 1.8077823345935904e-05, "loss": 0.7149914503097534, "step": 1367 }, { "epoch": 0.48211453744493393, "grad_norm": 1.5575297411632822, "learning_rate": 1.8074387426119574e-05, "loss": 0.7294478416442871, "step": 1368 }, { "epoch": 0.4824669603524229, "grad_norm": 1.4689289799329066, "learning_rate": 1.8070948765299995e-05, "loss": 0.7115635871887207, "step": 1369 }, { "epoch": 0.4828193832599119, "grad_norm": 1.5506146763507274, "learning_rate": 1.806750736464449e-05, "loss": 0.7046270966529846, "step": 1370 }, { "epoch": 0.4831718061674009, "grad_norm": 1.42427078791196, "learning_rate": 1.8064063225321305e-05, "loss": 0.6206589937210083, "step": 1371 }, { "epoch": 0.48352422907488984, "grad_norm": 1.2993997688945442, "learning_rate": 1.8060616348499612e-05, "loss": 0.7135940194129944, "step": 1372 }, { "epoch": 0.48387665198237884, "grad_norm": 1.4643276104475023, "learning_rate": 1.8057166735349533e-05, "loss": 0.8360849618911743, "step": 1373 }, { "epoch": 0.4842290748898678, "grad_norm": 1.541707883618089, "learning_rate": 1.805371438704209e-05, "loss": 0.6842360496520996, "step": 1374 }, { "epoch": 0.4845814977973568, "grad_norm": 1.5840103891509227, "learning_rate": 1.8050259304749254e-05, "loss": 0.7615031003952026, "step": 1375 }, { "epoch": 0.4849339207048458, "grad_norm": 1.655426139564667, "learning_rate": 1.804680148964392e-05, "loss": 0.8019323348999023, "step": 1376 }, { "epoch": 0.4852863436123348, "grad_norm": 1.4674909380859245, "learning_rate": 1.8043340942899906e-05, "loss": 0.7882958054542542, "step": 1377 }, { "epoch": 0.4856387665198238, "grad_norm": 1.2708485815687132, "learning_rate": 1.8039877665691955e-05, "loss": 0.7504314184188843, "step": 1378 }, { "epoch": 0.4859911894273128, "grad_norm": 1.4258268452315883, "learning_rate": 1.803641165919575e-05, "loss": 0.6634547710418701, "step": 1379 }, { "epoch": 0.4863436123348018, "grad_norm": 1.5654330696713128, "learning_rate": 1.803294292458789e-05, "loss": 0.7744965553283691, "step": 1380 }, { "epoch": 0.48669603524229077, "grad_norm": 1.4537322254817193, "learning_rate": 1.8029471463045904e-05, "loss": 0.6322098970413208, "step": 1381 }, { "epoch": 0.48704845814977976, "grad_norm": 1.4410033770501562, "learning_rate": 1.8025997275748237e-05, "loss": 0.7675940990447998, "step": 1382 }, { "epoch": 0.4874008810572687, "grad_norm": 1.4138967124963124, "learning_rate": 1.8022520363874275e-05, "loss": 0.805001974105835, "step": 1383 }, { "epoch": 0.4877533039647577, "grad_norm": 1.4447058519334661, "learning_rate": 1.8019040728604322e-05, "loss": 0.7647902369499207, "step": 1384 }, { "epoch": 0.4881057268722467, "grad_norm": 1.4676048165311881, "learning_rate": 1.8015558371119604e-05, "loss": 0.7267208099365234, "step": 1385 }, { "epoch": 0.4884581497797357, "grad_norm": 1.698344162431053, "learning_rate": 1.801207329260227e-05, "loss": 0.9259899854660034, "step": 1386 }, { "epoch": 0.48881057268722466, "grad_norm": 1.6007666753359713, "learning_rate": 1.8008585494235398e-05, "loss": 0.7127895951271057, "step": 1387 }, { "epoch": 0.48916299559471366, "grad_norm": 1.3612199688450533, "learning_rate": 1.8005094977202987e-05, "loss": 0.5890867710113525, "step": 1388 }, { "epoch": 0.48951541850220265, "grad_norm": 1.355680060820382, "learning_rate": 1.800160174268996e-05, "loss": 0.9388052225112915, "step": 1389 }, { "epoch": 0.48986784140969164, "grad_norm": 1.3938222391852138, "learning_rate": 1.799810579188216e-05, "loss": 0.7282747626304626, "step": 1390 }, { "epoch": 0.49022026431718063, "grad_norm": 1.3481077360000804, "learning_rate": 1.7994607125966354e-05, "loss": 0.743558943271637, "step": 1391 }, { "epoch": 0.4905726872246696, "grad_norm": 1.5830453320245632, "learning_rate": 1.7991105746130234e-05, "loss": 0.794719934463501, "step": 1392 }, { "epoch": 0.4909251101321586, "grad_norm": 1.2758935421604947, "learning_rate": 1.7987601653562402e-05, "loss": 0.7320685982704163, "step": 1393 }, { "epoch": 0.49127753303964755, "grad_norm": 1.7642547814838838, "learning_rate": 1.798409484945239e-05, "loss": 0.7376105785369873, "step": 1394 }, { "epoch": 0.49162995594713654, "grad_norm": 1.2029848235346605, "learning_rate": 1.7980585334990652e-05, "loss": 0.7474706172943115, "step": 1395 }, { "epoch": 0.49198237885462553, "grad_norm": 1.2018884579546327, "learning_rate": 1.797707311136856e-05, "loss": 0.5799805521965027, "step": 1396 }, { "epoch": 0.4923348017621145, "grad_norm": 1.4260726798049534, "learning_rate": 1.79735581797784e-05, "loss": 0.7515959739685059, "step": 1397 }, { "epoch": 0.4926872246696035, "grad_norm": 1.4843732287701579, "learning_rate": 1.797004054141339e-05, "loss": 0.6035799980163574, "step": 1398 }, { "epoch": 0.4930396475770925, "grad_norm": 1.4699634461145672, "learning_rate": 1.796652019746765e-05, "loss": 0.7613668441772461, "step": 1399 }, { "epoch": 0.4933920704845815, "grad_norm": 1.5395256627563776, "learning_rate": 1.7962997149136226e-05, "loss": 0.8780882954597473, "step": 1400 }, { "epoch": 0.4937444933920705, "grad_norm": 1.4849311758521768, "learning_rate": 1.795947139761509e-05, "loss": 0.8661091327667236, "step": 1401 }, { "epoch": 0.4940969162995595, "grad_norm": 1.2531714361223334, "learning_rate": 1.7955942944101124e-05, "loss": 0.6893571019172668, "step": 1402 }, { "epoch": 0.4944493392070485, "grad_norm": 1.4079915487364913, "learning_rate": 1.7952411789792125e-05, "loss": 0.787032961845398, "step": 1403 }, { "epoch": 0.49480176211453747, "grad_norm": 1.3474472991478739, "learning_rate": 1.7948877935886812e-05, "loss": 0.5346347689628601, "step": 1404 }, { "epoch": 0.4951541850220264, "grad_norm": 1.5512557601329955, "learning_rate": 1.7945341383584818e-05, "loss": 0.8090060949325562, "step": 1405 }, { "epoch": 0.4955066079295154, "grad_norm": 1.4268796756971738, "learning_rate": 1.7941802134086695e-05, "loss": 0.6321496963500977, "step": 1406 }, { "epoch": 0.4958590308370044, "grad_norm": 1.4602718850691796, "learning_rate": 1.7938260188593903e-05, "loss": 0.6405632495880127, "step": 1407 }, { "epoch": 0.4962114537444934, "grad_norm": 1.3838752085896924, "learning_rate": 1.7934715548308825e-05, "loss": 0.7665356397628784, "step": 1408 }, { "epoch": 0.49656387665198237, "grad_norm": 1.6983169415711221, "learning_rate": 1.7931168214434757e-05, "loss": 0.7960416078567505, "step": 1409 }, { "epoch": 0.49691629955947136, "grad_norm": 1.3842449461014021, "learning_rate": 1.7927618188175908e-05, "loss": 0.8080639839172363, "step": 1410 }, { "epoch": 0.49726872246696036, "grad_norm": 1.3034648934851016, "learning_rate": 1.79240654707374e-05, "loss": 0.6503266096115112, "step": 1411 }, { "epoch": 0.49762114537444935, "grad_norm": 1.3378534420648176, "learning_rate": 1.792051006332527e-05, "loss": 0.6063007116317749, "step": 1412 }, { "epoch": 0.49797356828193834, "grad_norm": 1.849150255820523, "learning_rate": 1.791695196714647e-05, "loss": 0.6861660480499268, "step": 1413 }, { "epoch": 0.49832599118942733, "grad_norm": 1.2217791382902905, "learning_rate": 1.791339118340886e-05, "loss": 0.7064980268478394, "step": 1414 }, { "epoch": 0.4986784140969163, "grad_norm": 1.4370359000865323, "learning_rate": 1.7909827713321214e-05, "loss": 0.6102496981620789, "step": 1415 }, { "epoch": 0.49903083700440526, "grad_norm": 1.446734818664789, "learning_rate": 1.790626155809323e-05, "loss": 0.7460618019104004, "step": 1416 }, { "epoch": 0.49938325991189425, "grad_norm": 1.2988677548719765, "learning_rate": 1.7902692718935496e-05, "loss": 0.7124448418617249, "step": 1417 }, { "epoch": 0.49973568281938324, "grad_norm": 1.322744101240627, "learning_rate": 1.7899121197059525e-05, "loss": 0.7194923162460327, "step": 1418 }, { "epoch": 0.5000881057268722, "grad_norm": 1.4429377947794157, "learning_rate": 1.7895546993677736e-05, "loss": 0.6633901596069336, "step": 1419 }, { "epoch": 0.5004405286343613, "grad_norm": 1.5531583469807302, "learning_rate": 1.7891970110003463e-05, "loss": 0.8554216623306274, "step": 1420 }, { "epoch": 0.5007929515418502, "grad_norm": 1.4541421669927512, "learning_rate": 1.7888390547250944e-05, "loss": 0.7259502410888672, "step": 1421 }, { "epoch": 0.5011453744493392, "grad_norm": 1.4299229413313208, "learning_rate": 1.788480830663533e-05, "loss": 0.7330816984176636, "step": 1422 }, { "epoch": 0.5014977973568282, "grad_norm": 1.5727227347094554, "learning_rate": 1.7881223389372678e-05, "loss": 0.7793391346931458, "step": 1423 }, { "epoch": 0.5018502202643171, "grad_norm": 1.5101282054621992, "learning_rate": 1.787763579667996e-05, "loss": 0.7387483716011047, "step": 1424 }, { "epoch": 0.5022026431718062, "grad_norm": 1.2875272836020812, "learning_rate": 1.787404552977505e-05, "loss": 0.6665850877761841, "step": 1425 }, { "epoch": 0.5025550660792951, "grad_norm": 1.6443234538305773, "learning_rate": 1.7870452589876733e-05, "loss": 0.7487791180610657, "step": 1426 }, { "epoch": 0.5029074889867842, "grad_norm": 1.5494170755115177, "learning_rate": 1.78668569782047e-05, "loss": 0.6048247814178467, "step": 1427 }, { "epoch": 0.5032599118942731, "grad_norm": 1.2664597501734751, "learning_rate": 1.786325869597955e-05, "loss": 0.7196261882781982, "step": 1428 }, { "epoch": 0.5036123348017622, "grad_norm": 1.8296774166979555, "learning_rate": 1.785965774442278e-05, "loss": 0.6845135688781738, "step": 1429 }, { "epoch": 0.5039647577092511, "grad_norm": 1.4157663102240734, "learning_rate": 1.785605412475681e-05, "loss": 0.7314398288726807, "step": 1430 }, { "epoch": 0.5043171806167401, "grad_norm": 1.4666969447710358, "learning_rate": 1.7852447838204957e-05, "loss": 0.7171268463134766, "step": 1431 }, { "epoch": 0.5046696035242291, "grad_norm": 1.33657009662446, "learning_rate": 1.784883888599144e-05, "loss": 0.8349916338920593, "step": 1432 }, { "epoch": 0.505022026431718, "grad_norm": 1.3501942895276628, "learning_rate": 1.7845227269341387e-05, "loss": 0.6375530958175659, "step": 1433 }, { "epoch": 0.505374449339207, "grad_norm": 1.42707048545369, "learning_rate": 1.7841612989480824e-05, "loss": 0.8156824707984924, "step": 1434 }, { "epoch": 0.505726872246696, "grad_norm": 1.4408580248696123, "learning_rate": 1.7837996047636696e-05, "loss": 0.7186283469200134, "step": 1435 }, { "epoch": 0.506079295154185, "grad_norm": 1.3439268630529597, "learning_rate": 1.7834376445036834e-05, "loss": 0.6130756139755249, "step": 1436 }, { "epoch": 0.506431718061674, "grad_norm": 1.5285659623162418, "learning_rate": 1.7830754182909985e-05, "loss": 0.6948508024215698, "step": 1437 }, { "epoch": 0.506784140969163, "grad_norm": 1.3759453423428971, "learning_rate": 1.7827129262485793e-05, "loss": 0.7049688100814819, "step": 1438 }, { "epoch": 0.507136563876652, "grad_norm": 1.457151343686531, "learning_rate": 1.7823501684994805e-05, "loss": 0.7491527795791626, "step": 1439 }, { "epoch": 0.507488986784141, "grad_norm": 1.6101324796455516, "learning_rate": 1.781987145166847e-05, "loss": 0.8718780279159546, "step": 1440 }, { "epoch": 0.5078414096916299, "grad_norm": 1.2572878912363772, "learning_rate": 1.7816238563739144e-05, "loss": 0.5675592422485352, "step": 1441 }, { "epoch": 0.508193832599119, "grad_norm": 1.4044509323540495, "learning_rate": 1.7812603022440076e-05, "loss": 0.7472085952758789, "step": 1442 }, { "epoch": 0.5085462555066079, "grad_norm": 1.3371129648202849, "learning_rate": 1.7808964829005416e-05, "loss": 0.7673810720443726, "step": 1443 }, { "epoch": 0.5088986784140969, "grad_norm": 1.5506550607349072, "learning_rate": 1.7805323984670224e-05, "loss": 0.8245630264282227, "step": 1444 }, { "epoch": 0.5092511013215859, "grad_norm": 1.488734758513416, "learning_rate": 1.780168049067045e-05, "loss": 0.8578429222106934, "step": 1445 }, { "epoch": 0.5096035242290748, "grad_norm": 1.3892444083620181, "learning_rate": 1.7798034348242944e-05, "loss": 0.6631708145141602, "step": 1446 }, { "epoch": 0.5099559471365639, "grad_norm": 1.3121030116229568, "learning_rate": 1.779438555862546e-05, "loss": 0.8106615543365479, "step": 1447 }, { "epoch": 0.5103083700440528, "grad_norm": 1.1486822439059632, "learning_rate": 1.7790734123056654e-05, "loss": 0.7033256888389587, "step": 1448 }, { "epoch": 0.5106607929515419, "grad_norm": 1.2259259255559172, "learning_rate": 1.7787080042776065e-05, "loss": 0.7124278545379639, "step": 1449 }, { "epoch": 0.5110132158590308, "grad_norm": 1.4546377837760451, "learning_rate": 1.7783423319024144e-05, "loss": 0.7834827899932861, "step": 1450 }, { "epoch": 0.5113656387665199, "grad_norm": 1.4580618513432573, "learning_rate": 1.777976395304224e-05, "loss": 0.6762892603874207, "step": 1451 }, { "epoch": 0.5117180616740088, "grad_norm": 1.4220157860300873, "learning_rate": 1.7776101946072586e-05, "loss": 0.7317261695861816, "step": 1452 }, { "epoch": 0.5120704845814978, "grad_norm": 1.3265767127223091, "learning_rate": 1.7772437299358324e-05, "loss": 0.6278417110443115, "step": 1453 }, { "epoch": 0.5124229074889868, "grad_norm": 1.319373459720871, "learning_rate": 1.7768770014143485e-05, "loss": 0.6638025045394897, "step": 1454 }, { "epoch": 0.5127753303964758, "grad_norm": 1.400198364176684, "learning_rate": 1.7765100091673e-05, "loss": 0.786564826965332, "step": 1455 }, { "epoch": 0.5131277533039648, "grad_norm": 1.4579007616104753, "learning_rate": 1.776142753319269e-05, "loss": 0.7483570575714111, "step": 1456 }, { "epoch": 0.5134801762114537, "grad_norm": 1.5755517235246568, "learning_rate": 1.7757752339949284e-05, "loss": 0.7036221027374268, "step": 1457 }, { "epoch": 0.5138325991189427, "grad_norm": 1.4840022330643747, "learning_rate": 1.7754074513190384e-05, "loss": 0.6903718709945679, "step": 1458 }, { "epoch": 0.5141850220264317, "grad_norm": 1.2882226376562813, "learning_rate": 1.77503940541645e-05, "loss": 0.7728221416473389, "step": 1459 }, { "epoch": 0.5145374449339207, "grad_norm": 1.327669814898394, "learning_rate": 1.774671096412104e-05, "loss": 0.7127183675765991, "step": 1460 }, { "epoch": 0.5148898678414097, "grad_norm": 1.6330052955229915, "learning_rate": 1.7743025244310293e-05, "loss": 0.7801295518875122, "step": 1461 }, { "epoch": 0.5152422907488987, "grad_norm": 1.1623220195345323, "learning_rate": 1.773933689598345e-05, "loss": 0.632892906665802, "step": 1462 }, { "epoch": 0.5155947136563876, "grad_norm": 1.2497961025206838, "learning_rate": 1.7735645920392587e-05, "loss": 0.7347458600997925, "step": 1463 }, { "epoch": 0.5159471365638767, "grad_norm": 1.5115996209276181, "learning_rate": 1.7731952318790673e-05, "loss": 0.6705365777015686, "step": 1464 }, { "epoch": 0.5162995594713656, "grad_norm": 1.4475904564128834, "learning_rate": 1.7728256092431577e-05, "loss": 0.696006715297699, "step": 1465 }, { "epoch": 0.5166519823788547, "grad_norm": 1.3978951424570836, "learning_rate": 1.7724557242570045e-05, "loss": 0.5922254323959351, "step": 1466 }, { "epoch": 0.5170044052863436, "grad_norm": 1.2709448074189098, "learning_rate": 1.7720855770461733e-05, "loss": 0.6162985563278198, "step": 1467 }, { "epoch": 0.5173568281938326, "grad_norm": 1.432801158502027, "learning_rate": 1.7717151677363164e-05, "loss": 0.7319275140762329, "step": 1468 }, { "epoch": 0.5177092511013216, "grad_norm": 1.3993642551309304, "learning_rate": 1.771344496453177e-05, "loss": 0.7349969148635864, "step": 1469 }, { "epoch": 0.5180616740088105, "grad_norm": 1.287285806622758, "learning_rate": 1.7709735633225863e-05, "loss": 0.8153162598609924, "step": 1470 }, { "epoch": 0.5184140969162996, "grad_norm": 1.3919169087311665, "learning_rate": 1.7706023684704642e-05, "loss": 0.6582974195480347, "step": 1471 }, { "epoch": 0.5187665198237885, "grad_norm": 1.704010679983685, "learning_rate": 1.77023091202282e-05, "loss": 0.696917712688446, "step": 1472 }, { "epoch": 0.5191189427312776, "grad_norm": 1.4310702415713368, "learning_rate": 1.769859194105752e-05, "loss": 0.6281285285949707, "step": 1473 }, { "epoch": 0.5194713656387665, "grad_norm": 1.6327610148964462, "learning_rate": 1.7694872148454463e-05, "loss": 0.7687089443206787, "step": 1474 }, { "epoch": 0.5198237885462555, "grad_norm": 1.386403275153257, "learning_rate": 1.7691149743681783e-05, "loss": 0.6928491592407227, "step": 1475 }, { "epoch": 0.5201762114537445, "grad_norm": 1.3972840676283895, "learning_rate": 1.7687424728003126e-05, "loss": 0.63843834400177, "step": 1476 }, { "epoch": 0.5205286343612335, "grad_norm": 1.7893361899671325, "learning_rate": 1.7683697102683012e-05, "loss": 0.8987904787063599, "step": 1477 }, { "epoch": 0.5208810572687225, "grad_norm": 1.3513150269139367, "learning_rate": 1.767996686898686e-05, "loss": 0.7027539014816284, "step": 1478 }, { "epoch": 0.5212334801762114, "grad_norm": 1.5924485741299983, "learning_rate": 1.7676234028180964e-05, "loss": 0.8490183353424072, "step": 1479 }, { "epoch": 0.5215859030837005, "grad_norm": 1.251712414046886, "learning_rate": 1.7672498581532508e-05, "loss": 0.5885729789733887, "step": 1480 }, { "epoch": 0.5219383259911894, "grad_norm": 1.289415742432068, "learning_rate": 1.766876053030956e-05, "loss": 0.627627968788147, "step": 1481 }, { "epoch": 0.5222907488986784, "grad_norm": 1.2948972408498374, "learning_rate": 1.766501987578108e-05, "loss": 0.6441413164138794, "step": 1482 }, { "epoch": 0.5226431718061674, "grad_norm": 1.3508329997529829, "learning_rate": 1.7661276619216888e-05, "loss": 0.6199722290039062, "step": 1483 }, { "epoch": 0.5229955947136564, "grad_norm": 1.2931208995237342, "learning_rate": 1.7657530761887715e-05, "loss": 0.6364887952804565, "step": 1484 }, { "epoch": 0.5233480176211454, "grad_norm": 1.281527242811407, "learning_rate": 1.7653782305065158e-05, "loss": 0.7279890775680542, "step": 1485 }, { "epoch": 0.5237004405286344, "grad_norm": 1.5228486275670003, "learning_rate": 1.7650031250021704e-05, "loss": 0.6552719473838806, "step": 1486 }, { "epoch": 0.5240528634361233, "grad_norm": 1.4461703633182712, "learning_rate": 1.7646277598030717e-05, "loss": 0.6778907775878906, "step": 1487 }, { "epoch": 0.5244052863436124, "grad_norm": 1.3941119820637071, "learning_rate": 1.7642521350366447e-05, "loss": 0.6581870317459106, "step": 1488 }, { "epoch": 0.5247577092511013, "grad_norm": 1.6198722329881745, "learning_rate": 1.7638762508304025e-05, "loss": 0.8529388904571533, "step": 1489 }, { "epoch": 0.5251101321585903, "grad_norm": 1.659639253256808, "learning_rate": 1.7635001073119458e-05, "loss": 0.6601512432098389, "step": 1490 }, { "epoch": 0.5254625550660793, "grad_norm": 1.5073764890239725, "learning_rate": 1.7631237046089637e-05, "loss": 0.6347510814666748, "step": 1491 }, { "epoch": 0.5258149779735682, "grad_norm": 1.3256786239827452, "learning_rate": 1.762747042849233e-05, "loss": 0.700560986995697, "step": 1492 }, { "epoch": 0.5261674008810573, "grad_norm": 1.4060287838972836, "learning_rate": 1.762370122160619e-05, "loss": 0.5641219019889832, "step": 1493 }, { "epoch": 0.5265198237885462, "grad_norm": 1.3124518756577959, "learning_rate": 1.761992942671074e-05, "loss": 0.8017370700836182, "step": 1494 }, { "epoch": 0.5268722466960353, "grad_norm": 1.334442798992846, "learning_rate": 1.7616155045086394e-05, "loss": 0.6345353126525879, "step": 1495 }, { "epoch": 0.5272246696035242, "grad_norm": 1.6841165394853315, "learning_rate": 1.7612378078014432e-05, "loss": 0.7118426561355591, "step": 1496 }, { "epoch": 0.5275770925110133, "grad_norm": 1.6748084277774182, "learning_rate": 1.7608598526777017e-05, "loss": 0.6186550855636597, "step": 1497 }, { "epoch": 0.5279295154185022, "grad_norm": 1.4676540893387768, "learning_rate": 1.7604816392657195e-05, "loss": 0.8351110219955444, "step": 1498 }, { "epoch": 0.5282819383259912, "grad_norm": 1.3183866002309903, "learning_rate": 1.7601031676938877e-05, "loss": 0.638684868812561, "step": 1499 }, { "epoch": 0.5286343612334802, "grad_norm": 1.291067085285626, "learning_rate": 1.7597244380906856e-05, "loss": 0.5118356943130493, "step": 1500 }, { "epoch": 0.5289867841409691, "grad_norm": 1.2880504132355877, "learning_rate": 1.7593454505846807e-05, "loss": 0.637636125087738, "step": 1501 }, { "epoch": 0.5293392070484582, "grad_norm": 1.3905967147162603, "learning_rate": 1.7589662053045264e-05, "loss": 0.8412563800811768, "step": 1502 }, { "epoch": 0.5296916299559471, "grad_norm": 1.6432072453017084, "learning_rate": 1.758586702378966e-05, "loss": 0.7940464019775391, "step": 1503 }, { "epoch": 0.5300440528634361, "grad_norm": 1.4898667206132308, "learning_rate": 1.7582069419368276e-05, "loss": 0.7136783599853516, "step": 1504 }, { "epoch": 0.5303964757709251, "grad_norm": 1.5677232979916986, "learning_rate": 1.757826924107029e-05, "loss": 0.7212727069854736, "step": 1505 }, { "epoch": 0.5307488986784141, "grad_norm": 2.968905268892082, "learning_rate": 1.757446649018574e-05, "loss": 0.7026032209396362, "step": 1506 }, { "epoch": 0.5311013215859031, "grad_norm": 1.3050484980835664, "learning_rate": 1.7570661168005544e-05, "loss": 0.541954755783081, "step": 1507 }, { "epoch": 0.5314537444933921, "grad_norm": 1.3422366313712581, "learning_rate": 1.7566853275821488e-05, "loss": 0.6927075386047363, "step": 1508 }, { "epoch": 0.531806167400881, "grad_norm": 1.4069640098530838, "learning_rate": 1.7563042814926237e-05, "loss": 0.6556441783905029, "step": 1509 }, { "epoch": 0.5321585903083701, "grad_norm": 1.710258111864569, "learning_rate": 1.7559229786613317e-05, "loss": 0.6895149946212769, "step": 1510 }, { "epoch": 0.532511013215859, "grad_norm": 1.3737730722509855, "learning_rate": 1.755541419217714e-05, "loss": 0.6178139448165894, "step": 1511 }, { "epoch": 0.532863436123348, "grad_norm": 1.5162737493672722, "learning_rate": 1.7551596032912975e-05, "loss": 0.7645368576049805, "step": 1512 }, { "epoch": 0.533215859030837, "grad_norm": 1.3652252290806937, "learning_rate": 1.7547775310116973e-05, "loss": 0.8247367143630981, "step": 1513 }, { "epoch": 0.533568281938326, "grad_norm": 1.2941657542151124, "learning_rate": 1.7543952025086147e-05, "loss": 0.535837709903717, "step": 1514 }, { "epoch": 0.533920704845815, "grad_norm": 1.3232982615818571, "learning_rate": 1.7540126179118387e-05, "loss": 0.51450514793396, "step": 1515 }, { "epoch": 0.5342731277533039, "grad_norm": 1.3863880461090508, "learning_rate": 1.7536297773512444e-05, "loss": 0.7962276935577393, "step": 1516 }, { "epoch": 0.534625550660793, "grad_norm": 1.4799750544282257, "learning_rate": 1.753246680956795e-05, "loss": 0.7586444616317749, "step": 1517 }, { "epoch": 0.5349779735682819, "grad_norm": 1.4967875396536634, "learning_rate": 1.752863328858539e-05, "loss": 0.7074990272521973, "step": 1518 }, { "epoch": 0.535330396475771, "grad_norm": 1.3158842776684478, "learning_rate": 1.7524797211866126e-05, "loss": 0.7409921884536743, "step": 1519 }, { "epoch": 0.5356828193832599, "grad_norm": 1.3752676962962187, "learning_rate": 1.7520958580712394e-05, "loss": 0.7889251708984375, "step": 1520 }, { "epoch": 0.536035242290749, "grad_norm": 2.0871001645404776, "learning_rate": 1.751711739642728e-05, "loss": 0.8244975209236145, "step": 1521 }, { "epoch": 0.5363876651982379, "grad_norm": 1.441127888748836, "learning_rate": 1.7513273660314753e-05, "loss": 0.6573888659477234, "step": 1522 }, { "epoch": 0.5367400881057268, "grad_norm": 1.3793459185222714, "learning_rate": 1.7509427373679643e-05, "loss": 0.71816086769104, "step": 1523 }, { "epoch": 0.5370925110132159, "grad_norm": 1.5200593368820163, "learning_rate": 1.750557853782764e-05, "loss": 0.7681000232696533, "step": 1524 }, { "epoch": 0.5374449339207048, "grad_norm": 1.443442982592023, "learning_rate": 1.7501727154065304e-05, "loss": 0.6777461767196655, "step": 1525 }, { "epoch": 0.5377973568281939, "grad_norm": 1.5538840121485165, "learning_rate": 1.7497873223700063e-05, "loss": 0.714499831199646, "step": 1526 }, { "epoch": 0.5381497797356828, "grad_norm": 1.6085901588908644, "learning_rate": 1.7494016748040206e-05, "loss": 0.6587036848068237, "step": 1527 }, { "epoch": 0.5385022026431718, "grad_norm": 1.5748960305246453, "learning_rate": 1.7490157728394887e-05, "loss": 0.7256105542182922, "step": 1528 }, { "epoch": 0.5388546255506608, "grad_norm": 1.7818844853131433, "learning_rate": 1.7486296166074116e-05, "loss": 0.6551185846328735, "step": 1529 }, { "epoch": 0.5392070484581498, "grad_norm": 1.5961201900224617, "learning_rate": 1.7482432062388782e-05, "loss": 0.710479736328125, "step": 1530 }, { "epoch": 0.5395594713656388, "grad_norm": 1.3063302832130508, "learning_rate": 1.7478565418650625e-05, "loss": 0.7882527709007263, "step": 1531 }, { "epoch": 0.5399118942731278, "grad_norm": 1.4227949027781848, "learning_rate": 1.7474696236172247e-05, "loss": 0.6163671612739563, "step": 1532 }, { "epoch": 0.5402643171806167, "grad_norm": 1.3516530648193832, "learning_rate": 1.7470824516267125e-05, "loss": 0.6923140287399292, "step": 1533 }, { "epoch": 0.5406167400881057, "grad_norm": 1.259724627030556, "learning_rate": 1.7466950260249573e-05, "loss": 0.6473938226699829, "step": 1534 }, { "epoch": 0.5409691629955947, "grad_norm": 1.7187178399062975, "learning_rate": 1.7463073469434792e-05, "loss": 0.631247878074646, "step": 1535 }, { "epoch": 0.5413215859030837, "grad_norm": 1.8932273669088504, "learning_rate": 1.745919414513883e-05, "loss": 0.8113377690315247, "step": 1536 }, { "epoch": 0.5416740088105727, "grad_norm": 1.4958838672098407, "learning_rate": 1.7455312288678588e-05, "loss": 0.7950010299682617, "step": 1537 }, { "epoch": 0.5420264317180616, "grad_norm": 1.5066443226404773, "learning_rate": 1.7451427901371843e-05, "loss": 0.7279125452041626, "step": 1538 }, { "epoch": 0.5423788546255507, "grad_norm": 1.304013044677209, "learning_rate": 1.7447540984537225e-05, "loss": 0.6746084690093994, "step": 1539 }, { "epoch": 0.5427312775330396, "grad_norm": 1.2714028280363416, "learning_rate": 1.744365153949422e-05, "loss": 0.5818569660186768, "step": 1540 }, { "epoch": 0.5430837004405287, "grad_norm": 1.3192138998270364, "learning_rate": 1.743975956756317e-05, "loss": 0.7408111095428467, "step": 1541 }, { "epoch": 0.5434361233480176, "grad_norm": 1.4913068245941434, "learning_rate": 1.7435865070065282e-05, "loss": 0.6842402815818787, "step": 1542 }, { "epoch": 0.5437885462555067, "grad_norm": 1.3316441616353596, "learning_rate": 1.7431968048322617e-05, "loss": 0.6179615259170532, "step": 1543 }, { "epoch": 0.5441409691629956, "grad_norm": 1.3347896582759051, "learning_rate": 1.7428068503658085e-05, "loss": 0.5943140983581543, "step": 1544 }, { "epoch": 0.5444933920704845, "grad_norm": 1.797026236227536, "learning_rate": 1.742416643739547e-05, "loss": 0.7901419401168823, "step": 1545 }, { "epoch": 0.5448458149779736, "grad_norm": 1.4636864955433957, "learning_rate": 1.74202618508594e-05, "loss": 0.7630521655082703, "step": 1546 }, { "epoch": 0.5451982378854625, "grad_norm": 1.5322711938826543, "learning_rate": 1.7416354745375355e-05, "loss": 0.7662566900253296, "step": 1547 }, { "epoch": 0.5455506607929516, "grad_norm": 1.52556111103991, "learning_rate": 1.7412445122269683e-05, "loss": 0.5758601427078247, "step": 1548 }, { "epoch": 0.5459030837004405, "grad_norm": 1.3681670353760105, "learning_rate": 1.7408532982869573e-05, "loss": 0.753425121307373, "step": 1549 }, { "epoch": 0.5462555066079295, "grad_norm": 1.7147625296386437, "learning_rate": 1.7404618328503082e-05, "loss": 0.6954981088638306, "step": 1550 }, { "epoch": 0.5466079295154185, "grad_norm": 1.7209819459128521, "learning_rate": 1.7400701160499107e-05, "loss": 0.7608321905136108, "step": 1551 }, { "epoch": 0.5469603524229075, "grad_norm": 1.3289181862839086, "learning_rate": 1.7396781480187403e-05, "loss": 0.679731011390686, "step": 1552 }, { "epoch": 0.5473127753303965, "grad_norm": 1.547015119464835, "learning_rate": 1.7392859288898586e-05, "loss": 0.7101309299468994, "step": 1553 }, { "epoch": 0.5476651982378855, "grad_norm": 1.3483315531721025, "learning_rate": 1.7388934587964114e-05, "loss": 0.7975757122039795, "step": 1554 }, { "epoch": 0.5480176211453744, "grad_norm": 1.612241763199232, "learning_rate": 1.73850073787163e-05, "loss": 0.9132372140884399, "step": 1555 }, { "epoch": 0.5483700440528634, "grad_norm": 1.2910456692590726, "learning_rate": 1.7381077662488313e-05, "loss": 0.7375202178955078, "step": 1556 }, { "epoch": 0.5487224669603524, "grad_norm": 1.387618503105513, "learning_rate": 1.7377145440614165e-05, "loss": 0.7066434025764465, "step": 1557 }, { "epoch": 0.5490748898678414, "grad_norm": 1.3715154650071018, "learning_rate": 1.737321071442873e-05, "loss": 0.8217945098876953, "step": 1558 }, { "epoch": 0.5494273127753304, "grad_norm": 1.629646959297134, "learning_rate": 1.7369273485267716e-05, "loss": 0.6946529150009155, "step": 1559 }, { "epoch": 0.5497797356828193, "grad_norm": 1.525535566210846, "learning_rate": 1.7365333754467694e-05, "loss": 0.7671442627906799, "step": 1560 }, { "epoch": 0.5501321585903084, "grad_norm": 1.3988607690634036, "learning_rate": 1.736139152336608e-05, "loss": 0.7044692039489746, "step": 1561 }, { "epoch": 0.5504845814977973, "grad_norm": 1.368099127753493, "learning_rate": 1.735744679330114e-05, "loss": 0.6654937267303467, "step": 1562 }, { "epoch": 0.5508370044052864, "grad_norm": 1.2951614076743367, "learning_rate": 1.7353499565611986e-05, "loss": 0.6683400869369507, "step": 1563 }, { "epoch": 0.5511894273127753, "grad_norm": 1.236687311626723, "learning_rate": 1.734954984163858e-05, "loss": 0.6360758543014526, "step": 1564 }, { "epoch": 0.5515418502202644, "grad_norm": 1.3363599776517268, "learning_rate": 1.7345597622721727e-05, "loss": 0.6982365846633911, "step": 1565 }, { "epoch": 0.5518942731277533, "grad_norm": 1.559537817461735, "learning_rate": 1.7341642910203087e-05, "loss": 0.8011882305145264, "step": 1566 }, { "epoch": 0.5522466960352423, "grad_norm": 2.285492930360211, "learning_rate": 1.7337685705425156e-05, "loss": 0.8203347325325012, "step": 1567 }, { "epoch": 0.5525991189427313, "grad_norm": 1.2535380811715755, "learning_rate": 1.7333726009731288e-05, "loss": 0.653145432472229, "step": 1568 }, { "epoch": 0.5529515418502202, "grad_norm": 1.4482870728586805, "learning_rate": 1.7329763824465676e-05, "loss": 0.6527417302131653, "step": 1569 }, { "epoch": 0.5533039647577093, "grad_norm": 1.44377376134513, "learning_rate": 1.7325799150973353e-05, "loss": 0.6965492963790894, "step": 1570 }, { "epoch": 0.5536563876651982, "grad_norm": 1.64534067222521, "learning_rate": 1.7321831990600206e-05, "loss": 0.6779811382293701, "step": 1571 }, { "epoch": 0.5540088105726872, "grad_norm": 1.4062562776851213, "learning_rate": 1.731786234469296e-05, "loss": 0.733130931854248, "step": 1572 }, { "epoch": 0.5543612334801762, "grad_norm": 1.3711228848627866, "learning_rate": 1.7313890214599195e-05, "loss": 0.6482118964195251, "step": 1573 }, { "epoch": 0.5547136563876652, "grad_norm": 1.300564860417972, "learning_rate": 1.7309915601667312e-05, "loss": 0.5167185068130493, "step": 1574 }, { "epoch": 0.5550660792951542, "grad_norm": 1.5636119075248611, "learning_rate": 1.730593850724658e-05, "loss": 0.7172712087631226, "step": 1575 }, { "epoch": 0.5554185022026432, "grad_norm": 1.3031139662778384, "learning_rate": 1.730195893268709e-05, "loss": 0.6786075830459595, "step": 1576 }, { "epoch": 0.5557709251101322, "grad_norm": 1.272621756820605, "learning_rate": 1.7297976879339787e-05, "loss": 0.6823022365570068, "step": 1577 }, { "epoch": 0.5561233480176212, "grad_norm": 1.5517603954080275, "learning_rate": 1.7293992348556462e-05, "loss": 0.6882521510124207, "step": 1578 }, { "epoch": 0.5564757709251101, "grad_norm": 1.3633741314626187, "learning_rate": 1.7290005341689726e-05, "loss": 0.6028990745544434, "step": 1579 }, { "epoch": 0.5568281938325991, "grad_norm": 1.6493192591020644, "learning_rate": 1.728601586009305e-05, "loss": 0.7759981155395508, "step": 1580 }, { "epoch": 0.5571806167400881, "grad_norm": 1.4356044858306343, "learning_rate": 1.7282023905120743e-05, "loss": 0.7067322134971619, "step": 1581 }, { "epoch": 0.5575330396475771, "grad_norm": 1.6158791701222606, "learning_rate": 1.727802947812794e-05, "loss": 0.7972309589385986, "step": 1582 }, { "epoch": 0.5578854625550661, "grad_norm": 1.7662811513100274, "learning_rate": 1.7274032580470634e-05, "loss": 0.780463457107544, "step": 1583 }, { "epoch": 0.558237885462555, "grad_norm": 1.4053617141185595, "learning_rate": 1.7270033213505638e-05, "loss": 0.647217869758606, "step": 1584 }, { "epoch": 0.5585903083700441, "grad_norm": 1.3125952525291176, "learning_rate": 1.7266031378590624e-05, "loss": 0.6253752112388611, "step": 1585 }, { "epoch": 0.558942731277533, "grad_norm": 11.7060219187992, "learning_rate": 1.7262027077084083e-05, "loss": 0.8427211046218872, "step": 1586 }, { "epoch": 0.5592951541850221, "grad_norm": 1.344046568539196, "learning_rate": 1.7258020310345348e-05, "loss": 0.6763455867767334, "step": 1587 }, { "epoch": 0.559647577092511, "grad_norm": 1.593422172771999, "learning_rate": 1.72540110797346e-05, "loss": 0.7333850264549255, "step": 1588 }, { "epoch": 0.56, "grad_norm": 1.6040079500892586, "learning_rate": 1.7249999386612844e-05, "loss": 0.8572328090667725, "step": 1589 }, { "epoch": 0.560352422907489, "grad_norm": 1.5035390542036942, "learning_rate": 1.7245985232341923e-05, "loss": 0.7960183620452881, "step": 1590 }, { "epoch": 0.5607048458149779, "grad_norm": 1.484772075429922, "learning_rate": 1.7241968618284518e-05, "loss": 0.6750795841217041, "step": 1591 }, { "epoch": 0.561057268722467, "grad_norm": 1.9239116239416003, "learning_rate": 1.7237949545804145e-05, "loss": 0.7828525304794312, "step": 1592 }, { "epoch": 0.5614096916299559, "grad_norm": 1.4415449299886975, "learning_rate": 1.7233928016265158e-05, "loss": 0.7414604425430298, "step": 1593 }, { "epoch": 0.561762114537445, "grad_norm": 1.4483242479736562, "learning_rate": 1.7229904031032736e-05, "loss": 0.6853663921356201, "step": 1594 }, { "epoch": 0.5621145374449339, "grad_norm": 1.9067676423331832, "learning_rate": 1.72258775914729e-05, "loss": 0.7923493385314941, "step": 1595 }, { "epoch": 0.5624669603524229, "grad_norm": 1.6239202976244251, "learning_rate": 1.7221848698952496e-05, "loss": 0.6776527166366577, "step": 1596 }, { "epoch": 0.5628193832599119, "grad_norm": 1.4721879083766742, "learning_rate": 1.721781735483921e-05, "loss": 0.6036615371704102, "step": 1597 }, { "epoch": 0.5631718061674009, "grad_norm": 1.271294238053108, "learning_rate": 1.7213783560501564e-05, "loss": 0.7175784111022949, "step": 1598 }, { "epoch": 0.5635242290748899, "grad_norm": 1.609537856897954, "learning_rate": 1.7209747317308897e-05, "loss": 0.790808379650116, "step": 1599 }, { "epoch": 0.5638766519823789, "grad_norm": 1.211639696248482, "learning_rate": 1.7205708626631392e-05, "loss": 0.6230301856994629, "step": 1600 }, { "epoch": 0.5642290748898678, "grad_norm": 1.120326299832536, "learning_rate": 1.720166748984006e-05, "loss": 0.712124228477478, "step": 1601 }, { "epoch": 0.5645814977973568, "grad_norm": 1.1185092917911836, "learning_rate": 1.719762390830674e-05, "loss": 0.543883740901947, "step": 1602 }, { "epoch": 0.5649339207048458, "grad_norm": 1.3866183721479424, "learning_rate": 1.71935778834041e-05, "loss": 0.7619644999504089, "step": 1603 }, { "epoch": 0.5652863436123348, "grad_norm": 1.3869247346305908, "learning_rate": 1.718952941650564e-05, "loss": 0.6447019577026367, "step": 1604 }, { "epoch": 0.5656387665198238, "grad_norm": 1.4175373147115695, "learning_rate": 1.718547850898569e-05, "loss": 0.7254266738891602, "step": 1605 }, { "epoch": 0.5659911894273127, "grad_norm": 1.3621762521360266, "learning_rate": 1.7181425162219406e-05, "loss": 0.632878839969635, "step": 1606 }, { "epoch": 0.5663436123348018, "grad_norm": 1.3921274088807207, "learning_rate": 1.7177369377582776e-05, "loss": 0.7711806893348694, "step": 1607 }, { "epoch": 0.5666960352422907, "grad_norm": 1.1613347832568823, "learning_rate": 1.7173311156452607e-05, "loss": 0.6639282703399658, "step": 1608 }, { "epoch": 0.5670484581497798, "grad_norm": 1.4423463303361395, "learning_rate": 1.7169250500206544e-05, "loss": 0.6918407082557678, "step": 1609 }, { "epoch": 0.5674008810572687, "grad_norm": 2.283192950596924, "learning_rate": 1.716518741022305e-05, "loss": 0.6602861881256104, "step": 1610 }, { "epoch": 0.5677533039647578, "grad_norm": 1.401616641880741, "learning_rate": 1.7161121887881424e-05, "loss": 0.5853942632675171, "step": 1611 }, { "epoch": 0.5681057268722467, "grad_norm": 1.4206445071697613, "learning_rate": 1.7157053934561775e-05, "loss": 0.6793895959854126, "step": 1612 }, { "epoch": 0.5684581497797356, "grad_norm": 1.43055320760408, "learning_rate": 1.7152983551645054e-05, "loss": 0.7882634401321411, "step": 1613 }, { "epoch": 0.5688105726872247, "grad_norm": 1.4602086959676452, "learning_rate": 1.7148910740513023e-05, "loss": 0.6530553698539734, "step": 1614 }, { "epoch": 0.5691629955947136, "grad_norm": 1.2905537135464573, "learning_rate": 1.714483550254828e-05, "loss": 0.6405597925186157, "step": 1615 }, { "epoch": 0.5695154185022027, "grad_norm": 1.4236330365126968, "learning_rate": 1.714075783913424e-05, "loss": 0.7356796860694885, "step": 1616 }, { "epoch": 0.5698678414096916, "grad_norm": 1.3877607090316109, "learning_rate": 1.7136677751655142e-05, "loss": 0.7393465042114258, "step": 1617 }, { "epoch": 0.5702202643171806, "grad_norm": 1.6092126006316967, "learning_rate": 1.7132595241496045e-05, "loss": 0.7205296158790588, "step": 1618 }, { "epoch": 0.5705726872246696, "grad_norm": 1.291376266983401, "learning_rate": 1.7128510310042842e-05, "loss": 0.7359808683395386, "step": 1619 }, { "epoch": 0.5709251101321586, "grad_norm": 1.3759135749970453, "learning_rate": 1.712442295868224e-05, "loss": 0.7097065448760986, "step": 1620 }, { "epoch": 0.5712775330396476, "grad_norm": 1.3905917375530226, "learning_rate": 1.7120333188801756e-05, "loss": 0.66839599609375, "step": 1621 }, { "epoch": 0.5716299559471366, "grad_norm": 1.7035593754714837, "learning_rate": 1.7116241001789753e-05, "loss": 0.8373857736587524, "step": 1622 }, { "epoch": 0.5719823788546256, "grad_norm": 1.4514044348034505, "learning_rate": 1.7112146399035393e-05, "loss": 0.6405144333839417, "step": 1623 }, { "epoch": 0.5723348017621145, "grad_norm": 1.3537498495813336, "learning_rate": 1.710804938192867e-05, "loss": 0.622218906879425, "step": 1624 }, { "epoch": 0.5726872246696035, "grad_norm": 1.3235233015291856, "learning_rate": 1.710394995186039e-05, "loss": 0.6728596687316895, "step": 1625 }, { "epoch": 0.5730396475770925, "grad_norm": 1.457353775792826, "learning_rate": 1.7099848110222188e-05, "loss": 0.7749369144439697, "step": 1626 }, { "epoch": 0.5733920704845815, "grad_norm": 1.5414707611626788, "learning_rate": 1.7095743858406506e-05, "loss": 0.7230759859085083, "step": 1627 }, { "epoch": 0.5737444933920705, "grad_norm": 1.540981219180448, "learning_rate": 1.7091637197806614e-05, "loss": 0.8243547677993774, "step": 1628 }, { "epoch": 0.5740969162995595, "grad_norm": 1.38043003521811, "learning_rate": 1.708752812981659e-05, "loss": 0.5860315561294556, "step": 1629 }, { "epoch": 0.5744493392070484, "grad_norm": 1.6273061636094053, "learning_rate": 1.708341665583134e-05, "loss": 0.6623368859291077, "step": 1630 }, { "epoch": 0.5748017621145375, "grad_norm": 1.8119651381751527, "learning_rate": 1.7079302777246577e-05, "loss": 0.6467370986938477, "step": 1631 }, { "epoch": 0.5751541850220264, "grad_norm": 1.5119118761679917, "learning_rate": 1.707518649545884e-05, "loss": 0.6443271636962891, "step": 1632 }, { "epoch": 0.5755066079295155, "grad_norm": 1.3128080413830525, "learning_rate": 1.7071067811865477e-05, "loss": 0.6995208263397217, "step": 1633 }, { "epoch": 0.5758590308370044, "grad_norm": 1.4660315838841709, "learning_rate": 1.706694672786465e-05, "loss": 0.698627233505249, "step": 1634 }, { "epoch": 0.5762114537444933, "grad_norm": 1.3788458614759633, "learning_rate": 1.706282324485534e-05, "loss": 0.713565468788147, "step": 1635 }, { "epoch": 0.5765638766519824, "grad_norm": 1.4050651409728825, "learning_rate": 1.7058697364237342e-05, "loss": 0.7978894710540771, "step": 1636 }, { "epoch": 0.5769162995594713, "grad_norm": 1.374012134646938, "learning_rate": 1.7054569087411262e-05, "loss": 0.7361177206039429, "step": 1637 }, { "epoch": 0.5772687224669604, "grad_norm": 1.3640656150089683, "learning_rate": 1.705043841577853e-05, "loss": 0.5904364585876465, "step": 1638 }, { "epoch": 0.5776211453744493, "grad_norm": 1.4706525609098695, "learning_rate": 1.7046305350741365e-05, "loss": 0.7122133374214172, "step": 1639 }, { "epoch": 0.5779735682819384, "grad_norm": 1.5208627357939872, "learning_rate": 1.7042169893702826e-05, "loss": 0.6350806951522827, "step": 1640 }, { "epoch": 0.5783259911894273, "grad_norm": 1.4511692718944456, "learning_rate": 1.7038032046066767e-05, "loss": 0.6332669258117676, "step": 1641 }, { "epoch": 0.5786784140969163, "grad_norm": 1.415207402865657, "learning_rate": 1.7033891809237865e-05, "loss": 0.6645903587341309, "step": 1642 }, { "epoch": 0.5790308370044053, "grad_norm": 1.6697269215763402, "learning_rate": 1.7029749184621593e-05, "loss": 0.8156411051750183, "step": 1643 }, { "epoch": 0.5793832599118943, "grad_norm": 1.3789808786486863, "learning_rate": 1.7025604173624247e-05, "loss": 0.6778720617294312, "step": 1644 }, { "epoch": 0.5797356828193833, "grad_norm": 1.5882994058774447, "learning_rate": 1.702145677765293e-05, "loss": 0.6774875521659851, "step": 1645 }, { "epoch": 0.5800881057268722, "grad_norm": 1.7790432286964633, "learning_rate": 1.701730699811555e-05, "loss": 0.9239652156829834, "step": 1646 }, { "epoch": 0.5804405286343612, "grad_norm": 1.3647594896468807, "learning_rate": 1.701315483642083e-05, "loss": 0.6841437816619873, "step": 1647 }, { "epoch": 0.5807929515418502, "grad_norm": 1.7199469103031315, "learning_rate": 1.7009000293978308e-05, "loss": 0.7540775537490845, "step": 1648 }, { "epoch": 0.5811453744493392, "grad_norm": 1.0742597088843755, "learning_rate": 1.7004843372198306e-05, "loss": 0.5534735321998596, "step": 1649 }, { "epoch": 0.5814977973568282, "grad_norm": 1.326312979627632, "learning_rate": 1.7000684072491984e-05, "loss": 0.5398745536804199, "step": 1650 }, { "epoch": 0.5818502202643172, "grad_norm": 1.583833147288038, "learning_rate": 1.6996522396271285e-05, "loss": 0.7249305248260498, "step": 1651 }, { "epoch": 0.5822026431718061, "grad_norm": 2.3893378173132973, "learning_rate": 1.6992358344948976e-05, "loss": 0.819263219833374, "step": 1652 }, { "epoch": 0.5825550660792952, "grad_norm": 1.4489156713328724, "learning_rate": 1.6988191919938618e-05, "loss": 0.7421448826789856, "step": 1653 }, { "epoch": 0.5829074889867841, "grad_norm": 1.832209725536692, "learning_rate": 1.6984023122654584e-05, "loss": 0.7665672302246094, "step": 1654 }, { "epoch": 0.5832599118942732, "grad_norm": 1.390589552129084, "learning_rate": 1.697985195451205e-05, "loss": 0.7226558327674866, "step": 1655 }, { "epoch": 0.5836123348017621, "grad_norm": 1.5091001050977364, "learning_rate": 1.6975678416926995e-05, "loss": 0.6702080965042114, "step": 1656 }, { "epoch": 0.583964757709251, "grad_norm": 1.460442381139403, "learning_rate": 1.697150251131621e-05, "loss": 0.5843878984451294, "step": 1657 }, { "epoch": 0.5843171806167401, "grad_norm": 1.37517469234843, "learning_rate": 1.6967324239097287e-05, "loss": 0.707448422908783, "step": 1658 }, { "epoch": 0.584669603524229, "grad_norm": 1.8436282149841139, "learning_rate": 1.6963143601688615e-05, "loss": 0.7619093060493469, "step": 1659 }, { "epoch": 0.5850220264317181, "grad_norm": 1.5399166464925174, "learning_rate": 1.695896060050939e-05, "loss": 0.6550310850143433, "step": 1660 }, { "epoch": 0.585374449339207, "grad_norm": 1.6689625417691945, "learning_rate": 1.6954775236979616e-05, "loss": 0.7202504277229309, "step": 1661 }, { "epoch": 0.5857268722466961, "grad_norm": 1.4936106294591966, "learning_rate": 1.6950587512520085e-05, "loss": 0.7941907644271851, "step": 1662 }, { "epoch": 0.586079295154185, "grad_norm": 1.3939181305394832, "learning_rate": 1.6946397428552406e-05, "loss": 0.6349755525588989, "step": 1663 }, { "epoch": 0.586431718061674, "grad_norm": 1.4663377684980818, "learning_rate": 1.6942204986498978e-05, "loss": 0.6220123171806335, "step": 1664 }, { "epoch": 0.586784140969163, "grad_norm": 1.3729457618271874, "learning_rate": 1.693801018778301e-05, "loss": 0.6617282629013062, "step": 1665 }, { "epoch": 0.587136563876652, "grad_norm": 1.6745607368825612, "learning_rate": 1.6933813033828496e-05, "loss": 0.7424415349960327, "step": 1666 }, { "epoch": 0.587488986784141, "grad_norm": 1.4332695932293307, "learning_rate": 1.6929613526060254e-05, "loss": 0.7245291471481323, "step": 1667 }, { "epoch": 0.5878414096916299, "grad_norm": 1.7631957554533126, "learning_rate": 1.692541166590387e-05, "loss": 0.7037352323532104, "step": 1668 }, { "epoch": 0.588193832599119, "grad_norm": 1.563153866597813, "learning_rate": 1.6921207454785754e-05, "loss": 0.7452583312988281, "step": 1669 }, { "epoch": 0.5885462555066079, "grad_norm": 1.8223456889525438, "learning_rate": 1.6917000894133106e-05, "loss": 0.7773720026016235, "step": 1670 }, { "epoch": 0.5888986784140969, "grad_norm": 1.6663522681826546, "learning_rate": 1.6912791985373916e-05, "loss": 0.5820617079734802, "step": 1671 }, { "epoch": 0.5892511013215859, "grad_norm": 1.4638050818442514, "learning_rate": 1.6908580729936983e-05, "loss": 0.7513154745101929, "step": 1672 }, { "epoch": 0.5896035242290749, "grad_norm": 1.4916906386520274, "learning_rate": 1.6904367129251898e-05, "loss": 0.6741763949394226, "step": 1673 }, { "epoch": 0.5899559471365639, "grad_norm": 1.4430048165358413, "learning_rate": 1.690015118474904e-05, "loss": 0.7290149331092834, "step": 1674 }, { "epoch": 0.5903083700440529, "grad_norm": 1.431209358109114, "learning_rate": 1.6895932897859596e-05, "loss": 0.651113748550415, "step": 1675 }, { "epoch": 0.5906607929515418, "grad_norm": 1.5475090754915908, "learning_rate": 1.6891712270015546e-05, "loss": 0.8062121272087097, "step": 1676 }, { "epoch": 0.5910132158590309, "grad_norm": 1.6532405105419041, "learning_rate": 1.6887489302649657e-05, "loss": 0.7168683409690857, "step": 1677 }, { "epoch": 0.5913656387665198, "grad_norm": 1.5137609810465338, "learning_rate": 1.6883263997195497e-05, "loss": 0.6751970052719116, "step": 1678 }, { "epoch": 0.5917180616740088, "grad_norm": 1.3244566227591112, "learning_rate": 1.687903635508742e-05, "loss": 0.5176222324371338, "step": 1679 }, { "epoch": 0.5920704845814978, "grad_norm": 1.532290203616517, "learning_rate": 1.6874806377760587e-05, "loss": 0.605686366558075, "step": 1680 }, { "epoch": 0.5924229074889867, "grad_norm": 1.55000273332987, "learning_rate": 1.6870574066650945e-05, "loss": 0.6927961111068726, "step": 1681 }, { "epoch": 0.5927753303964758, "grad_norm": 1.7129667821490024, "learning_rate": 1.6866339423195223e-05, "loss": 0.7434122562408447, "step": 1682 }, { "epoch": 0.5931277533039647, "grad_norm": 1.6508583062240207, "learning_rate": 1.6862102448830956e-05, "loss": 0.5646539926528931, "step": 1683 }, { "epoch": 0.5934801762114538, "grad_norm": 1.6845514517525704, "learning_rate": 1.6857863144996464e-05, "loss": 0.6666921377182007, "step": 1684 }, { "epoch": 0.5938325991189427, "grad_norm": 1.7487162446625693, "learning_rate": 1.6853621513130857e-05, "loss": 0.6630325317382812, "step": 1685 }, { "epoch": 0.5941850220264318, "grad_norm": 1.6744610818707069, "learning_rate": 1.6849377554674042e-05, "loss": 0.6519981622695923, "step": 1686 }, { "epoch": 0.5945374449339207, "grad_norm": 1.523793082989738, "learning_rate": 1.6845131271066705e-05, "loss": 0.7958102822303772, "step": 1687 }, { "epoch": 0.5948898678414097, "grad_norm": 1.6258620864429363, "learning_rate": 1.6840882663750333e-05, "loss": 0.6136632561683655, "step": 1688 }, { "epoch": 0.5952422907488987, "grad_norm": 1.5696515726783535, "learning_rate": 1.683663173416719e-05, "loss": 0.6177657842636108, "step": 1689 }, { "epoch": 0.5955947136563877, "grad_norm": 1.3990187120155009, "learning_rate": 1.683237848376034e-05, "loss": 0.7489751577377319, "step": 1690 }, { "epoch": 0.5959471365638767, "grad_norm": 1.7037734397554838, "learning_rate": 1.6828122913973625e-05, "loss": 0.6749632954597473, "step": 1691 }, { "epoch": 0.5962995594713656, "grad_norm": 1.7564038851615957, "learning_rate": 1.682386502625168e-05, "loss": 0.6340545415878296, "step": 1692 }, { "epoch": 0.5966519823788546, "grad_norm": 1.2684465272191359, "learning_rate": 1.6819604822039924e-05, "loss": 0.6141117811203003, "step": 1693 }, { "epoch": 0.5970044052863436, "grad_norm": 1.6665435860950566, "learning_rate": 1.681534230278457e-05, "loss": 0.7937319874763489, "step": 1694 }, { "epoch": 0.5973568281938326, "grad_norm": 1.376760638279742, "learning_rate": 1.68110774699326e-05, "loss": 0.6196104288101196, "step": 1695 }, { "epoch": 0.5977092511013216, "grad_norm": 1.755256295612453, "learning_rate": 1.68068103249318e-05, "loss": 0.6856463551521301, "step": 1696 }, { "epoch": 0.5980616740088106, "grad_norm": 1.423055172614558, "learning_rate": 1.680254086923073e-05, "loss": 0.754359245300293, "step": 1697 }, { "epoch": 0.5984140969162995, "grad_norm": 1.5540819723583295, "learning_rate": 1.6798269104278738e-05, "loss": 0.6663862466812134, "step": 1698 }, { "epoch": 0.5987665198237886, "grad_norm": 1.8192134096199304, "learning_rate": 1.6793995031525955e-05, "loss": 0.7072615027427673, "step": 1699 }, { "epoch": 0.5991189427312775, "grad_norm": 1.3664015344189913, "learning_rate": 1.678971865242329e-05, "loss": 0.5722007751464844, "step": 1700 }, { "epoch": 0.5994713656387666, "grad_norm": 1.5146739460913152, "learning_rate": 1.6785439968422456e-05, "loss": 0.8254455327987671, "step": 1701 }, { "epoch": 0.5998237885462555, "grad_norm": 1.6221807995806083, "learning_rate": 1.678115898097592e-05, "loss": 0.5726041793823242, "step": 1702 }, { "epoch": 0.6001762114537444, "grad_norm": 1.7173506198717712, "learning_rate": 1.6776875691536946e-05, "loss": 0.6480926275253296, "step": 1703 }, { "epoch": 0.6005286343612335, "grad_norm": 1.801703791100917, "learning_rate": 1.677259010155958e-05, "loss": 0.6469742059707642, "step": 1704 }, { "epoch": 0.6008810572687224, "grad_norm": 1.6534691770392222, "learning_rate": 1.6768302212498647e-05, "loss": 0.814565896987915, "step": 1705 }, { "epoch": 0.6012334801762115, "grad_norm": 1.6212192399903926, "learning_rate": 1.6764012025809745e-05, "loss": 0.7063060402870178, "step": 1706 }, { "epoch": 0.6015859030837004, "grad_norm": 1.2993416112883407, "learning_rate": 1.6759719542949268e-05, "loss": 0.6523685455322266, "step": 1707 }, { "epoch": 0.6019383259911895, "grad_norm": 1.7291371377992661, "learning_rate": 1.6755424765374378e-05, "loss": 0.7361165285110474, "step": 1708 }, { "epoch": 0.6022907488986784, "grad_norm": 1.1307969866596985, "learning_rate": 1.6751127694543012e-05, "loss": 0.45241934061050415, "step": 1709 }, { "epoch": 0.6026431718061674, "grad_norm": 1.3734078208692269, "learning_rate": 1.6746828331913903e-05, "loss": 0.6610431671142578, "step": 1710 }, { "epoch": 0.6029955947136564, "grad_norm": 1.6659887779271019, "learning_rate": 1.674252667894654e-05, "loss": 0.7572601437568665, "step": 1711 }, { "epoch": 0.6033480176211454, "grad_norm": 1.3828996049540105, "learning_rate": 1.6738222737101205e-05, "loss": 0.7021572589874268, "step": 1712 }, { "epoch": 0.6037004405286344, "grad_norm": 1.5581462402658262, "learning_rate": 1.6733916507838952e-05, "loss": 0.7742347121238708, "step": 1713 }, { "epoch": 0.6040528634361233, "grad_norm": 1.5666267075277038, "learning_rate": 1.6729607992621613e-05, "loss": 0.6453407406806946, "step": 1714 }, { "epoch": 0.6044052863436123, "grad_norm": 1.279025328652212, "learning_rate": 1.6725297192911793e-05, "loss": 0.7004555463790894, "step": 1715 }, { "epoch": 0.6047577092511013, "grad_norm": 1.3482721305547676, "learning_rate": 1.6720984110172875e-05, "loss": 0.6979051232337952, "step": 1716 }, { "epoch": 0.6051101321585903, "grad_norm": 1.5059245296578512, "learning_rate": 1.671666874586902e-05, "loss": 0.6387851238250732, "step": 1717 }, { "epoch": 0.6054625550660793, "grad_norm": 1.5397561778856637, "learning_rate": 1.671235110146515e-05, "loss": 0.9083811044692993, "step": 1718 }, { "epoch": 0.6058149779735683, "grad_norm": 1.637790853716126, "learning_rate": 1.6708031178426984e-05, "loss": 0.747002363204956, "step": 1719 }, { "epoch": 0.6061674008810573, "grad_norm": 1.6617583077406621, "learning_rate": 1.6703708978220986e-05, "loss": 0.7553372383117676, "step": 1720 }, { "epoch": 0.6065198237885463, "grad_norm": 1.72002611544435, "learning_rate": 1.669938450231442e-05, "loss": 0.762795090675354, "step": 1721 }, { "epoch": 0.6068722466960352, "grad_norm": 1.3894206198813077, "learning_rate": 1.669505775217531e-05, "loss": 0.739936113357544, "step": 1722 }, { "epoch": 0.6072246696035243, "grad_norm": 1.625344781935558, "learning_rate": 1.6690728729272456e-05, "loss": 0.8439112305641174, "step": 1723 }, { "epoch": 0.6075770925110132, "grad_norm": 1.5345011506472854, "learning_rate": 1.6686397435075416e-05, "loss": 0.6144756078720093, "step": 1724 }, { "epoch": 0.6079295154185022, "grad_norm": 1.3674442510472364, "learning_rate": 1.6682063871054534e-05, "loss": 0.569161057472229, "step": 1725 }, { "epoch": 0.6082819383259912, "grad_norm": 1.6372827589624075, "learning_rate": 1.6677728038680926e-05, "loss": 0.7523979544639587, "step": 1726 }, { "epoch": 0.6086343612334801, "grad_norm": 1.453986649514636, "learning_rate": 1.6673389939426463e-05, "loss": 0.6394520401954651, "step": 1727 }, { "epoch": 0.6089867841409692, "grad_norm": 1.358198647287584, "learning_rate": 1.66690495747638e-05, "loss": 0.5975633859634399, "step": 1728 }, { "epoch": 0.6093392070484581, "grad_norm": 1.6192297143942058, "learning_rate": 1.666470694616636e-05, "loss": 0.736790657043457, "step": 1729 }, { "epoch": 0.6096916299559472, "grad_norm": 1.4234241508654442, "learning_rate": 1.6660362055108316e-05, "loss": 0.7693831920623779, "step": 1730 }, { "epoch": 0.6100440528634361, "grad_norm": 1.8032471376275176, "learning_rate": 1.665601490306464e-05, "loss": 0.7322608232498169, "step": 1731 }, { "epoch": 0.6103964757709252, "grad_norm": 1.3709677099617412, "learning_rate": 1.6651665491511043e-05, "loss": 0.6478679180145264, "step": 1732 }, { "epoch": 0.6107488986784141, "grad_norm": 1.8838571148858527, "learning_rate": 1.6647313821924022e-05, "loss": 0.7125877141952515, "step": 1733 }, { "epoch": 0.6111013215859031, "grad_norm": 1.5594770538222507, "learning_rate": 1.664295989578083e-05, "loss": 0.8999321460723877, "step": 1734 }, { "epoch": 0.6114537444933921, "grad_norm": 1.618421596120734, "learning_rate": 1.663860371455949e-05, "loss": 0.6908334493637085, "step": 1735 }, { "epoch": 0.611806167400881, "grad_norm": 1.5552403174407248, "learning_rate": 1.663424527973879e-05, "loss": 0.6708767414093018, "step": 1736 }, { "epoch": 0.61215859030837, "grad_norm": 1.4907630752773764, "learning_rate": 1.6629884592798283e-05, "loss": 0.6991565823554993, "step": 1737 }, { "epoch": 0.612511013215859, "grad_norm": 1.430459100414143, "learning_rate": 1.6625521655218287e-05, "loss": 0.6224193572998047, "step": 1738 }, { "epoch": 0.612863436123348, "grad_norm": 1.6355889531807317, "learning_rate": 1.662115646847988e-05, "loss": 0.701459527015686, "step": 1739 }, { "epoch": 0.613215859030837, "grad_norm": 1.508424771304017, "learning_rate": 1.6616789034064914e-05, "loss": 0.784063458442688, "step": 1740 }, { "epoch": 0.613568281938326, "grad_norm": 1.4868333492675876, "learning_rate": 1.661241935345599e-05, "loss": 0.7604146003723145, "step": 1741 }, { "epoch": 0.613920704845815, "grad_norm": 1.7090188741959023, "learning_rate": 1.6608047428136482e-05, "loss": 0.6347941160202026, "step": 1742 }, { "epoch": 0.614273127753304, "grad_norm": 1.6487656059998825, "learning_rate": 1.6603673259590524e-05, "loss": 0.7559434175491333, "step": 1743 }, { "epoch": 0.6146255506607929, "grad_norm": 1.5969979245345363, "learning_rate": 1.6599296849303007e-05, "loss": 0.742524266242981, "step": 1744 }, { "epoch": 0.614977973568282, "grad_norm": 1.2238633556789393, "learning_rate": 1.6594918198759586e-05, "loss": 0.697594165802002, "step": 1745 }, { "epoch": 0.6153303964757709, "grad_norm": 1.4536023257551807, "learning_rate": 1.659053730944668e-05, "loss": 0.7876765131950378, "step": 1746 }, { "epoch": 0.6156828193832599, "grad_norm": 1.489887595585156, "learning_rate": 1.658615418285146e-05, "loss": 0.7514386177062988, "step": 1747 }, { "epoch": 0.6160352422907489, "grad_norm": 1.6935500501856253, "learning_rate": 1.658176882046187e-05, "loss": 0.6220899820327759, "step": 1748 }, { "epoch": 0.6163876651982378, "grad_norm": 1.9395284146525182, "learning_rate": 1.6577381223766592e-05, "loss": 0.7376539707183838, "step": 1749 }, { "epoch": 0.6167400881057269, "grad_norm": 1.6373866531670291, "learning_rate": 1.6572991394255084e-05, "loss": 0.8296281099319458, "step": 1750 }, { "epoch": 0.6170925110132158, "grad_norm": 1.545978766740828, "learning_rate": 1.656859933341756e-05, "loss": 0.7316757440567017, "step": 1751 }, { "epoch": 0.6174449339207049, "grad_norm": 1.5280854263636194, "learning_rate": 1.6564205042744986e-05, "loss": 0.6933871507644653, "step": 1752 }, { "epoch": 0.6177973568281938, "grad_norm": 1.890269396017501, "learning_rate": 1.655980852372908e-05, "loss": 0.6835601329803467, "step": 1753 }, { "epoch": 0.6181497797356829, "grad_norm": 1.3967466693425752, "learning_rate": 1.655540977786233e-05, "loss": 0.6752027869224548, "step": 1754 }, { "epoch": 0.6185022026431718, "grad_norm": 1.4944496246124994, "learning_rate": 1.6551008806637976e-05, "loss": 0.6092851758003235, "step": 1755 }, { "epoch": 0.6188546255506608, "grad_norm": 1.3266652259646856, "learning_rate": 1.6546605611550008e-05, "loss": 0.682563066482544, "step": 1756 }, { "epoch": 0.6192070484581498, "grad_norm": 1.5302981352911342, "learning_rate": 1.654220019409317e-05, "loss": 0.8674311637878418, "step": 1757 }, { "epoch": 0.6195594713656387, "grad_norm": 1.4437314589210788, "learning_rate": 1.6537792555762966e-05, "loss": 0.7209165096282959, "step": 1758 }, { "epoch": 0.6199118942731278, "grad_norm": 1.5958855115050472, "learning_rate": 1.6533382698055655e-05, "loss": 0.7795991897583008, "step": 1759 }, { "epoch": 0.6202643171806167, "grad_norm": 1.6392261912532398, "learning_rate": 1.6528970622468245e-05, "loss": 0.6749448776245117, "step": 1760 }, { "epoch": 0.6206167400881057, "grad_norm": 1.5291165267411688, "learning_rate": 1.6524556330498494e-05, "loss": 0.9127920866012573, "step": 1761 }, { "epoch": 0.6209691629955947, "grad_norm": 1.5402491362904795, "learning_rate": 1.6520139823644922e-05, "loss": 0.6224071979522705, "step": 1762 }, { "epoch": 0.6213215859030837, "grad_norm": 1.426673111398807, "learning_rate": 1.6515721103406798e-05, "loss": 0.6955251693725586, "step": 1763 }, { "epoch": 0.6216740088105727, "grad_norm": 1.7187740007003602, "learning_rate": 1.6511300171284132e-05, "loss": 0.676613986492157, "step": 1764 }, { "epoch": 0.6220264317180617, "grad_norm": 1.4024924612217573, "learning_rate": 1.65068770287777e-05, "loss": 0.7482033967971802, "step": 1765 }, { "epoch": 0.6223788546255506, "grad_norm": 1.4659804586317469, "learning_rate": 1.6502451677389015e-05, "loss": 0.6019684076309204, "step": 1766 }, { "epoch": 0.6227312775330397, "grad_norm": 1.419796458872072, "learning_rate": 1.649802411862035e-05, "loss": 0.6796068549156189, "step": 1767 }, { "epoch": 0.6230837004405286, "grad_norm": 2.234008541241949, "learning_rate": 1.6493594353974724e-05, "loss": 0.6351302862167358, "step": 1768 }, { "epoch": 0.6234361233480176, "grad_norm": 1.4257561009443, "learning_rate": 1.6489162384955906e-05, "loss": 0.6093732714653015, "step": 1769 }, { "epoch": 0.6237885462555066, "grad_norm": 1.842168854503522, "learning_rate": 1.6484728213068405e-05, "loss": 0.8181271553039551, "step": 1770 }, { "epoch": 0.6241409691629956, "grad_norm": 1.821206401126196, "learning_rate": 1.6480291839817488e-05, "loss": 0.7093993425369263, "step": 1771 }, { "epoch": 0.6244933920704846, "grad_norm": 1.416340976430299, "learning_rate": 1.6475853266709165e-05, "loss": 0.6895081996917725, "step": 1772 }, { "epoch": 0.6248458149779735, "grad_norm": 1.5970315552720198, "learning_rate": 1.6471412495250195e-05, "loss": 0.6706013679504395, "step": 1773 }, { "epoch": 0.6251982378854626, "grad_norm": 1.5170788749866242, "learning_rate": 1.6466969526948082e-05, "loss": 0.6700015664100647, "step": 1774 }, { "epoch": 0.6255506607929515, "grad_norm": 1.5173815641058028, "learning_rate": 1.6462524363311072e-05, "loss": 0.6591087579727173, "step": 1775 }, { "epoch": 0.6259030837004406, "grad_norm": 1.6219345446237772, "learning_rate": 1.6458077005848164e-05, "loss": 0.7775006294250488, "step": 1776 }, { "epoch": 0.6262555066079295, "grad_norm": 1.6260525304572828, "learning_rate": 1.6453627456069093e-05, "loss": 0.8459682464599609, "step": 1777 }, { "epoch": 0.6266079295154185, "grad_norm": 1.4031571304990242, "learning_rate": 1.6449175715484346e-05, "loss": 0.6536898612976074, "step": 1778 }, { "epoch": 0.6269603524229075, "grad_norm": 1.5129603585000657, "learning_rate": 1.6444721785605148e-05, "loss": 0.7543610334396362, "step": 1779 }, { "epoch": 0.6273127753303964, "grad_norm": 1.6228520645077271, "learning_rate": 1.6440265667943474e-05, "loss": 0.7416362762451172, "step": 1780 }, { "epoch": 0.6276651982378855, "grad_norm": 1.4583654660578542, "learning_rate": 1.6435807364012035e-05, "loss": 0.5505499839782715, "step": 1781 }, { "epoch": 0.6280176211453744, "grad_norm": 1.5252426453600672, "learning_rate": 1.6431346875324284e-05, "loss": 0.792723536491394, "step": 1782 }, { "epoch": 0.6283700440528635, "grad_norm": 1.3655475423968058, "learning_rate": 1.6426884203394416e-05, "loss": 0.6313158273696899, "step": 1783 }, { "epoch": 0.6287224669603524, "grad_norm": 1.6057168635576118, "learning_rate": 1.642241934973738e-05, "loss": 0.6168874502182007, "step": 1784 }, { "epoch": 0.6290748898678414, "grad_norm": 1.78997265433784, "learning_rate": 1.6417952315868845e-05, "loss": 0.6995766162872314, "step": 1785 }, { "epoch": 0.6294273127753304, "grad_norm": 1.4835625331683349, "learning_rate": 1.641348310330523e-05, "loss": 0.8046826124191284, "step": 1786 }, { "epoch": 0.6297797356828194, "grad_norm": 1.4892920408023869, "learning_rate": 1.6409011713563697e-05, "loss": 0.7227291464805603, "step": 1787 }, { "epoch": 0.6301321585903084, "grad_norm": 1.4682105257113767, "learning_rate": 1.6404538148162145e-05, "loss": 0.6463631391525269, "step": 1788 }, { "epoch": 0.6304845814977974, "grad_norm": 2.4977643907634, "learning_rate": 1.640006240861921e-05, "loss": 0.7473348379135132, "step": 1789 }, { "epoch": 0.6308370044052863, "grad_norm": 1.4291329366827183, "learning_rate": 1.6395584496454263e-05, "loss": 0.7311505079269409, "step": 1790 }, { "epoch": 0.6311894273127753, "grad_norm": 1.5618530036111458, "learning_rate": 1.639110441318742e-05, "loss": 0.7259535789489746, "step": 1791 }, { "epoch": 0.6315418502202643, "grad_norm": 1.515515721890048, "learning_rate": 1.6386622160339522e-05, "loss": 0.5777252912521362, "step": 1792 }, { "epoch": 0.6318942731277533, "grad_norm": 1.3190322559386176, "learning_rate": 1.638213773943216e-05, "loss": 0.5510598421096802, "step": 1793 }, { "epoch": 0.6322466960352423, "grad_norm": 1.5085803548323364, "learning_rate": 1.637765115198766e-05, "loss": 0.6448229551315308, "step": 1794 }, { "epoch": 0.6325991189427312, "grad_norm": 1.5827276696724286, "learning_rate": 1.6373162399529067e-05, "loss": 0.7359289526939392, "step": 1795 }, { "epoch": 0.6329515418502203, "grad_norm": 1.5346140091491929, "learning_rate": 1.6368671483580185e-05, "loss": 0.616656482219696, "step": 1796 }, { "epoch": 0.6333039647577092, "grad_norm": 1.4291822350961465, "learning_rate": 1.6364178405665534e-05, "loss": 0.5966289043426514, "step": 1797 }, { "epoch": 0.6336563876651983, "grad_norm": 1.8727626569458464, "learning_rate": 1.6359683167310375e-05, "loss": 0.7475985288619995, "step": 1798 }, { "epoch": 0.6340088105726872, "grad_norm": 1.4494645750595028, "learning_rate": 1.63551857700407e-05, "loss": 0.6030765771865845, "step": 1799 }, { "epoch": 0.6343612334801763, "grad_norm": 1.6637248682130477, "learning_rate": 1.6350686215383237e-05, "loss": 0.6193016171455383, "step": 1800 }, { "epoch": 0.6347136563876652, "grad_norm": 1.3604775956740969, "learning_rate": 1.6346184504865442e-05, "loss": 0.6404513120651245, "step": 1801 }, { "epoch": 0.6350660792951542, "grad_norm": 1.5539318450371893, "learning_rate": 1.6341680640015515e-05, "loss": 0.8453506231307983, "step": 1802 }, { "epoch": 0.6354185022026432, "grad_norm": 1.3642622033336096, "learning_rate": 1.6337174622362366e-05, "loss": 0.6094445586204529, "step": 1803 }, { "epoch": 0.6357709251101321, "grad_norm": 1.5112522647253264, "learning_rate": 1.6332666453435653e-05, "loss": 0.7352159023284912, "step": 1804 }, { "epoch": 0.6361233480176212, "grad_norm": 1.4529963307650198, "learning_rate": 1.632815613476576e-05, "loss": 0.7395339608192444, "step": 1805 }, { "epoch": 0.6364757709251101, "grad_norm": 1.4350925789909401, "learning_rate": 1.63236436678838e-05, "loss": 0.7246927618980408, "step": 1806 }, { "epoch": 0.6368281938325991, "grad_norm": 1.3653208723694477, "learning_rate": 1.6319129054321616e-05, "loss": 0.6913329362869263, "step": 1807 }, { "epoch": 0.6371806167400881, "grad_norm": 1.9893098285493216, "learning_rate": 1.6314612295611772e-05, "loss": 0.6410515308380127, "step": 1808 }, { "epoch": 0.6375330396475771, "grad_norm": 2.6583918764324665, "learning_rate": 1.6310093393287574e-05, "loss": 0.690910816192627, "step": 1809 }, { "epoch": 0.6378854625550661, "grad_norm": 1.4623649413484192, "learning_rate": 1.6305572348883044e-05, "loss": 0.6520562171936035, "step": 1810 }, { "epoch": 0.6382378854625551, "grad_norm": 1.6850706181935027, "learning_rate": 1.630104916393294e-05, "loss": 0.6966608166694641, "step": 1811 }, { "epoch": 0.638590308370044, "grad_norm": 1.7161033790648312, "learning_rate": 1.6296523839972743e-05, "loss": 0.826806902885437, "step": 1812 }, { "epoch": 0.6389427312775331, "grad_norm": 1.431569634617566, "learning_rate": 1.6291996378538653e-05, "loss": 0.6695773601531982, "step": 1813 }, { "epoch": 0.639295154185022, "grad_norm": 1.4264708644101765, "learning_rate": 1.6287466781167607e-05, "loss": 0.5725491046905518, "step": 1814 }, { "epoch": 0.639647577092511, "grad_norm": 1.2779233324378096, "learning_rate": 1.628293504939727e-05, "loss": 0.5543544292449951, "step": 1815 }, { "epoch": 0.64, "grad_norm": 3.2997728941963564, "learning_rate": 1.6278401184766007e-05, "loss": 0.6964641809463501, "step": 1816 }, { "epoch": 0.640352422907489, "grad_norm": 1.3065245679172277, "learning_rate": 1.6273865188812935e-05, "loss": 0.675407886505127, "step": 1817 }, { "epoch": 0.640704845814978, "grad_norm": 1.4883059032141013, "learning_rate": 1.626932706307788e-05, "loss": 0.6304433345794678, "step": 1818 }, { "epoch": 0.6410572687224669, "grad_norm": 1.5529882690454875, "learning_rate": 1.62647868091014e-05, "loss": 0.7432112693786621, "step": 1819 }, { "epoch": 0.641409691629956, "grad_norm": 1.5761551228008874, "learning_rate": 1.6260244428424763e-05, "loss": 0.730377197265625, "step": 1820 }, { "epoch": 0.6417621145374449, "grad_norm": 1.7239403694554825, "learning_rate": 1.6255699922589968e-05, "loss": 0.694229006767273, "step": 1821 }, { "epoch": 0.642114537444934, "grad_norm": 1.5664915948077012, "learning_rate": 1.6251153293139735e-05, "loss": 0.7284739017486572, "step": 1822 }, { "epoch": 0.6424669603524229, "grad_norm": 1.4047714992661522, "learning_rate": 1.6246604541617507e-05, "loss": 0.6028950214385986, "step": 1823 }, { "epoch": 0.642819383259912, "grad_norm": 1.65079248713073, "learning_rate": 1.6242053669567432e-05, "loss": 0.6776808500289917, "step": 1824 }, { "epoch": 0.6431718061674009, "grad_norm": 1.7695857292474644, "learning_rate": 1.6237500678534396e-05, "loss": 0.7743366956710815, "step": 1825 }, { "epoch": 0.6435242290748898, "grad_norm": 1.594351471613888, "learning_rate": 1.6232945570064e-05, "loss": 0.6356723308563232, "step": 1826 }, { "epoch": 0.6438766519823789, "grad_norm": 1.4846113103688028, "learning_rate": 1.622838834570256e-05, "loss": 0.7356402277946472, "step": 1827 }, { "epoch": 0.6442290748898678, "grad_norm": 1.455165750941624, "learning_rate": 1.622382900699711e-05, "loss": 0.7639342546463013, "step": 1828 }, { "epoch": 0.6445814977973569, "grad_norm": 2.0823946019481987, "learning_rate": 1.6219267555495407e-05, "loss": 0.6969513297080994, "step": 1829 }, { "epoch": 0.6449339207048458, "grad_norm": 1.418146430885783, "learning_rate": 1.621470399274592e-05, "loss": 0.7532765865325928, "step": 1830 }, { "epoch": 0.6452863436123348, "grad_norm": 1.3893974330709622, "learning_rate": 1.6210138320297835e-05, "loss": 0.5801100730895996, "step": 1831 }, { "epoch": 0.6456387665198238, "grad_norm": 1.5780391931120195, "learning_rate": 1.6205570539701056e-05, "loss": 0.8006102442741394, "step": 1832 }, { "epoch": 0.6459911894273128, "grad_norm": 1.4094927188728377, "learning_rate": 1.6201000652506203e-05, "loss": 0.6507089138031006, "step": 1833 }, { "epoch": 0.6463436123348018, "grad_norm": 1.9684758989320281, "learning_rate": 1.619642866026461e-05, "loss": 0.7407999634742737, "step": 1834 }, { "epoch": 0.6466960352422908, "grad_norm": 1.4160609898798358, "learning_rate": 1.619185456452833e-05, "loss": 0.6964670419692993, "step": 1835 }, { "epoch": 0.6470484581497797, "grad_norm": 1.6614634508995256, "learning_rate": 1.6187278366850122e-05, "loss": 0.7095489501953125, "step": 1836 }, { "epoch": 0.6474008810572687, "grad_norm": 2.0391949894277017, "learning_rate": 1.6182700068783463e-05, "loss": 0.6968166828155518, "step": 1837 }, { "epoch": 0.6477533039647577, "grad_norm": 1.3206477384834772, "learning_rate": 1.617811967188254e-05, "loss": 0.7745821475982666, "step": 1838 }, { "epoch": 0.6481057268722467, "grad_norm": 1.4803456865319338, "learning_rate": 1.6173537177702266e-05, "loss": 0.7071934938430786, "step": 1839 }, { "epoch": 0.6484581497797357, "grad_norm": 1.7225763324537737, "learning_rate": 1.6168952587798242e-05, "loss": 0.6481701135635376, "step": 1840 }, { "epoch": 0.6488105726872246, "grad_norm": 1.4447543914645467, "learning_rate": 1.6164365903726805e-05, "loss": 0.6349890232086182, "step": 1841 }, { "epoch": 0.6491629955947137, "grad_norm": 1.3913908457554178, "learning_rate": 1.6159777127044982e-05, "loss": 0.6067368388175964, "step": 1842 }, { "epoch": 0.6495154185022026, "grad_norm": 1.3943413375617566, "learning_rate": 1.6155186259310523e-05, "loss": 0.7170778512954712, "step": 1843 }, { "epoch": 0.6498678414096917, "grad_norm": 1.4309397568408155, "learning_rate": 1.6150593302081888e-05, "loss": 0.5623376965522766, "step": 1844 }, { "epoch": 0.6502202643171806, "grad_norm": 1.442096873601557, "learning_rate": 1.6145998256918238e-05, "loss": 0.7295233607292175, "step": 1845 }, { "epoch": 0.6505726872246697, "grad_norm": 1.513681766461532, "learning_rate": 1.6141401125379454e-05, "loss": 0.6991151571273804, "step": 1846 }, { "epoch": 0.6509251101321586, "grad_norm": 1.568060173563952, "learning_rate": 1.6136801909026113e-05, "loss": 0.7553545236587524, "step": 1847 }, { "epoch": 0.6512775330396475, "grad_norm": 1.560177534517688, "learning_rate": 1.613220060941951e-05, "loss": 0.8280071020126343, "step": 1848 }, { "epoch": 0.6516299559471366, "grad_norm": 1.3846780543862842, "learning_rate": 1.6127597228121636e-05, "loss": 0.662299633026123, "step": 1849 }, { "epoch": 0.6519823788546255, "grad_norm": 1.519733781984336, "learning_rate": 1.6122991766695206e-05, "loss": 0.6493197679519653, "step": 1850 }, { "epoch": 0.6523348017621146, "grad_norm": 1.5074834442694671, "learning_rate": 1.6118384226703623e-05, "loss": 0.5910629034042358, "step": 1851 }, { "epoch": 0.6526872246696035, "grad_norm": 1.5082942143966174, "learning_rate": 1.611377460971101e-05, "loss": 0.7124426364898682, "step": 1852 }, { "epoch": 0.6530396475770925, "grad_norm": 1.6734021483912949, "learning_rate": 1.610916291728218e-05, "loss": 0.6081063747406006, "step": 1853 }, { "epoch": 0.6533920704845815, "grad_norm": 1.5485445677219123, "learning_rate": 1.6104549150982666e-05, "loss": 0.7536673545837402, "step": 1854 }, { "epoch": 0.6537444933920705, "grad_norm": 1.5239612944966212, "learning_rate": 1.6099933312378695e-05, "loss": 0.6514976024627686, "step": 1855 }, { "epoch": 0.6540969162995595, "grad_norm": 1.3951117738157057, "learning_rate": 1.6095315403037205e-05, "loss": 0.6595193147659302, "step": 1856 }, { "epoch": 0.6544493392070485, "grad_norm": 1.5562205804379312, "learning_rate": 1.6090695424525826e-05, "loss": 0.666920006275177, "step": 1857 }, { "epoch": 0.6548017621145374, "grad_norm": 1.5350434119319913, "learning_rate": 1.6086073378412902e-05, "loss": 0.5984979271888733, "step": 1858 }, { "epoch": 0.6551541850220264, "grad_norm": 1.8541188470544154, "learning_rate": 1.608144926626747e-05, "loss": 0.8021191358566284, "step": 1859 }, { "epoch": 0.6555066079295154, "grad_norm": 1.5029675710659876, "learning_rate": 1.6076823089659272e-05, "loss": 0.7368075847625732, "step": 1860 }, { "epoch": 0.6558590308370044, "grad_norm": 1.596711606351331, "learning_rate": 1.6072194850158755e-05, "loss": 0.7923766374588013, "step": 1861 }, { "epoch": 0.6562114537444934, "grad_norm": 1.6332800469997777, "learning_rate": 1.606756454933706e-05, "loss": 0.6907824873924255, "step": 1862 }, { "epoch": 0.6565638766519823, "grad_norm": 1.5674543537069574, "learning_rate": 1.606293218876603e-05, "loss": 0.7366634607315063, "step": 1863 }, { "epoch": 0.6569162995594714, "grad_norm": 1.7550517656533429, "learning_rate": 1.6058297770018208e-05, "loss": 0.7166022658348083, "step": 1864 }, { "epoch": 0.6572687224669603, "grad_norm": 1.5153527205809505, "learning_rate": 1.6053661294666833e-05, "loss": 0.6969404220581055, "step": 1865 }, { "epoch": 0.6576211453744494, "grad_norm": 1.5681332930444218, "learning_rate": 1.6049022764285846e-05, "loss": 0.7182974815368652, "step": 1866 }, { "epoch": 0.6579735682819383, "grad_norm": 2.620263422686914, "learning_rate": 1.6044382180449886e-05, "loss": 0.7469301819801331, "step": 1867 }, { "epoch": 0.6583259911894274, "grad_norm": 1.458082221775431, "learning_rate": 1.603973954473428e-05, "loss": 0.7097122073173523, "step": 1868 }, { "epoch": 0.6586784140969163, "grad_norm": 1.3404337000381439, "learning_rate": 1.6035094858715065e-05, "loss": 0.6907291412353516, "step": 1869 }, { "epoch": 0.6590308370044052, "grad_norm": 1.5576579616406543, "learning_rate": 1.6030448123968963e-05, "loss": 0.6259130239486694, "step": 1870 }, { "epoch": 0.6593832599118943, "grad_norm": 1.6431810286043311, "learning_rate": 1.6025799342073397e-05, "loss": 0.6948051452636719, "step": 1871 }, { "epoch": 0.6597356828193832, "grad_norm": 1.3540961323396474, "learning_rate": 1.602114851460648e-05, "loss": 0.7037572264671326, "step": 1872 }, { "epoch": 0.6600881057268723, "grad_norm": 1.565352238933419, "learning_rate": 1.6016495643147036e-05, "loss": 0.7728864550590515, "step": 1873 }, { "epoch": 0.6604405286343612, "grad_norm": 1.4345290675539004, "learning_rate": 1.601184072927456e-05, "loss": 0.7782067060470581, "step": 1874 }, { "epoch": 0.6607929515418502, "grad_norm": 1.4505913839056241, "learning_rate": 1.6007183774569246e-05, "loss": 0.6168591976165771, "step": 1875 }, { "epoch": 0.6611453744493392, "grad_norm": 1.6465062301007323, "learning_rate": 1.6002524780611995e-05, "loss": 0.702346920967102, "step": 1876 }, { "epoch": 0.6614977973568282, "grad_norm": 1.6478258582343996, "learning_rate": 1.5997863748984384e-05, "loss": 0.6084239482879639, "step": 1877 }, { "epoch": 0.6618502202643172, "grad_norm": 1.5841429013244157, "learning_rate": 1.5993200681268696e-05, "loss": 0.8307315707206726, "step": 1878 }, { "epoch": 0.6622026431718062, "grad_norm": 1.8073980879357947, "learning_rate": 1.5988535579047888e-05, "loss": 0.6465811729431152, "step": 1879 }, { "epoch": 0.6625550660792952, "grad_norm": 1.5593829827457022, "learning_rate": 1.598386844390562e-05, "loss": 0.71415114402771, "step": 1880 }, { "epoch": 0.6629074889867841, "grad_norm": 6.602062472303997, "learning_rate": 1.5979199277426243e-05, "loss": 0.7135012149810791, "step": 1881 }, { "epoch": 0.6632599118942731, "grad_norm": 1.584805815321856, "learning_rate": 1.597452808119479e-05, "loss": 0.840306282043457, "step": 1882 }, { "epoch": 0.6636123348017621, "grad_norm": 1.454651140369818, "learning_rate": 1.596985485679699e-05, "loss": 0.622429609298706, "step": 1883 }, { "epoch": 0.6639647577092511, "grad_norm": 1.5798478269154124, "learning_rate": 1.5965179605819248e-05, "loss": 0.6505612134933472, "step": 1884 }, { "epoch": 0.66431718061674, "grad_norm": 1.4292089389404006, "learning_rate": 1.5960502329848683e-05, "loss": 0.7665247917175293, "step": 1885 }, { "epoch": 0.6646696035242291, "grad_norm": 1.614107737492675, "learning_rate": 1.5955823030473068e-05, "loss": 0.7780051231384277, "step": 1886 }, { "epoch": 0.665022026431718, "grad_norm": 1.4074097920809756, "learning_rate": 1.5951141709280886e-05, "loss": 0.6311650276184082, "step": 1887 }, { "epoch": 0.6653744493392071, "grad_norm": 1.287734360896639, "learning_rate": 1.5946458367861302e-05, "loss": 0.7126712799072266, "step": 1888 }, { "epoch": 0.665726872246696, "grad_norm": 1.3823278268773909, "learning_rate": 1.5941773007804165e-05, "loss": 0.6979397535324097, "step": 1889 }, { "epoch": 0.6660792951541851, "grad_norm": 1.5067230035216896, "learning_rate": 1.5937085630700003e-05, "loss": 0.7065495252609253, "step": 1890 }, { "epoch": 0.666431718061674, "grad_norm": 1.373677820269664, "learning_rate": 1.593239623814004e-05, "loss": 0.6157221794128418, "step": 1891 }, { "epoch": 0.6667841409691629, "grad_norm": 1.6157271272896285, "learning_rate": 1.5927704831716177e-05, "loss": 0.6835625171661377, "step": 1892 }, { "epoch": 0.667136563876652, "grad_norm": 1.5002309814069255, "learning_rate": 1.5923011413021e-05, "loss": 0.6416822671890259, "step": 1893 }, { "epoch": 0.6674889867841409, "grad_norm": 1.4507514621746327, "learning_rate": 1.5918315983647782e-05, "loss": 0.7307168245315552, "step": 1894 }, { "epoch": 0.66784140969163, "grad_norm": 1.3321086634513644, "learning_rate": 1.5913618545190468e-05, "loss": 0.5464824438095093, "step": 1895 }, { "epoch": 0.6681938325991189, "grad_norm": 1.544912001907108, "learning_rate": 1.5908919099243698e-05, "loss": 0.6634502410888672, "step": 1896 }, { "epoch": 0.668546255506608, "grad_norm": 1.2985703589965545, "learning_rate": 1.5904217647402788e-05, "loss": 0.719158411026001, "step": 1897 }, { "epoch": 0.6688986784140969, "grad_norm": 1.5083721998375157, "learning_rate": 1.5899514191263733e-05, "loss": 0.7547527551651001, "step": 1898 }, { "epoch": 0.6692511013215859, "grad_norm": 1.6226125781851348, "learning_rate": 1.5894808732423207e-05, "loss": 0.7549886703491211, "step": 1899 }, { "epoch": 0.6696035242290749, "grad_norm": 1.5327056521201368, "learning_rate": 1.589010127247857e-05, "loss": 0.7107831239700317, "step": 1900 }, { "epoch": 0.6699559471365639, "grad_norm": 1.5679371113552734, "learning_rate": 1.588539181302786e-05, "loss": 0.855078935623169, "step": 1901 }, { "epoch": 0.6703083700440529, "grad_norm": 1.4970896726818788, "learning_rate": 1.5880680355669792e-05, "loss": 0.8235266208648682, "step": 1902 }, { "epoch": 0.6706607929515418, "grad_norm": 1.339674008175079, "learning_rate": 1.587596690200375e-05, "loss": 0.6060166358947754, "step": 1903 }, { "epoch": 0.6710132158590308, "grad_norm": 1.4603163291197105, "learning_rate": 1.5871251453629817e-05, "loss": 0.7325272560119629, "step": 1904 }, { "epoch": 0.6713656387665198, "grad_norm": 1.5470128203990354, "learning_rate": 1.586653401214873e-05, "loss": 0.674901008605957, "step": 1905 }, { "epoch": 0.6717180616740088, "grad_norm": 1.3515017914848853, "learning_rate": 1.5861814579161928e-05, "loss": 0.767164945602417, "step": 1906 }, { "epoch": 0.6720704845814978, "grad_norm": 1.3633425183694836, "learning_rate": 1.5857093156271496e-05, "loss": 0.5691556930541992, "step": 1907 }, { "epoch": 0.6724229074889868, "grad_norm": 1.3106038540183678, "learning_rate": 1.585236974508022e-05, "loss": 0.6885931491851807, "step": 1908 }, { "epoch": 0.6727753303964757, "grad_norm": 1.143239709830434, "learning_rate": 1.5847644347191545e-05, "loss": 0.6227391958236694, "step": 1909 }, { "epoch": 0.6731277533039648, "grad_norm": 1.4883434470080177, "learning_rate": 1.5842916964209602e-05, "loss": 0.6084527969360352, "step": 1910 }, { "epoch": 0.6734801762114537, "grad_norm": 1.7178691294348742, "learning_rate": 1.583818759773919e-05, "loss": 0.7001935243606567, "step": 1911 }, { "epoch": 0.6738325991189428, "grad_norm": 1.684468384573203, "learning_rate": 1.5833456249385774e-05, "loss": 0.8263465166091919, "step": 1912 }, { "epoch": 0.6741850220264317, "grad_norm": 1.6085564780466834, "learning_rate": 1.582872292075551e-05, "loss": 0.662792444229126, "step": 1913 }, { "epoch": 0.6745374449339208, "grad_norm": 1.7464203558320361, "learning_rate": 1.582398761345521e-05, "loss": 0.7093051075935364, "step": 1914 }, { "epoch": 0.6748898678414097, "grad_norm": 1.4885122105608484, "learning_rate": 1.5819250329092364e-05, "loss": 0.7264106273651123, "step": 1915 }, { "epoch": 0.6752422907488986, "grad_norm": 1.5383309179609377, "learning_rate": 1.581451106927513e-05, "loss": 0.6561543345451355, "step": 1916 }, { "epoch": 0.6755947136563877, "grad_norm": 1.634971670239321, "learning_rate": 1.580976983561235e-05, "loss": 0.6563262939453125, "step": 1917 }, { "epoch": 0.6759471365638766, "grad_norm": 1.2931579342976025, "learning_rate": 1.5805026629713512e-05, "loss": 0.5224509239196777, "step": 1918 }, { "epoch": 0.6762995594713657, "grad_norm": 1.4840746720603137, "learning_rate": 1.5800281453188793e-05, "loss": 0.6565898656845093, "step": 1919 }, { "epoch": 0.6766519823788546, "grad_norm": 1.4375600407888718, "learning_rate": 1.5795534307649032e-05, "loss": 0.7954028844833374, "step": 1920 }, { "epoch": 0.6770044052863436, "grad_norm": 1.3454762773409146, "learning_rate": 1.579078519470574e-05, "loss": 0.6624404788017273, "step": 1921 }, { "epoch": 0.6773568281938326, "grad_norm": 1.5514355338443828, "learning_rate": 1.5786034115971083e-05, "loss": 0.840311586856842, "step": 1922 }, { "epoch": 0.6777092511013216, "grad_norm": 1.5163172495660509, "learning_rate": 1.578128107305792e-05, "loss": 0.6967859864234924, "step": 1923 }, { "epoch": 0.6780616740088106, "grad_norm": 1.0735596232953704, "learning_rate": 1.5776526067579746e-05, "loss": 0.5295379161834717, "step": 1924 }, { "epoch": 0.6784140969162996, "grad_norm": 1.8118747234451476, "learning_rate": 1.5771769101150752e-05, "loss": 0.6758475303649902, "step": 1925 }, { "epoch": 0.6787665198237885, "grad_norm": 1.3510918406813899, "learning_rate": 1.576701017538577e-05, "loss": 0.6891785860061646, "step": 1926 }, { "epoch": 0.6791189427312775, "grad_norm": 1.4115910497948105, "learning_rate": 1.5762249291900304e-05, "loss": 0.6507086157798767, "step": 1927 }, { "epoch": 0.6794713656387665, "grad_norm": 1.4168935733459347, "learning_rate": 1.5757486452310537e-05, "loss": 0.6220029592514038, "step": 1928 }, { "epoch": 0.6798237885462555, "grad_norm": 1.5134078284046213, "learning_rate": 1.5752721658233294e-05, "loss": 0.7742874622344971, "step": 1929 }, { "epoch": 0.6801762114537445, "grad_norm": 1.513809055671425, "learning_rate": 1.5747954911286085e-05, "loss": 0.6895851492881775, "step": 1930 }, { "epoch": 0.6805286343612335, "grad_norm": 1.6367265924041048, "learning_rate": 1.5743186213087062e-05, "loss": 0.71466064453125, "step": 1931 }, { "epoch": 0.6808810572687225, "grad_norm": 1.506916023064254, "learning_rate": 1.5738415565255056e-05, "loss": 0.6465627551078796, "step": 1932 }, { "epoch": 0.6812334801762114, "grad_norm": 1.3796886447957644, "learning_rate": 1.5733642969409553e-05, "loss": 0.7592962980270386, "step": 1933 }, { "epoch": 0.6815859030837005, "grad_norm": 1.662185742102518, "learning_rate": 1.57288684271707e-05, "loss": 0.7641816735267639, "step": 1934 }, { "epoch": 0.6819383259911894, "grad_norm": 1.5600426648231815, "learning_rate": 1.5724091940159306e-05, "loss": 0.7015130519866943, "step": 1935 }, { "epoch": 0.6822907488986785, "grad_norm": 1.5031237824980206, "learning_rate": 1.5719313509996833e-05, "loss": 0.7851461172103882, "step": 1936 }, { "epoch": 0.6826431718061674, "grad_norm": 1.5670991097913773, "learning_rate": 1.571453313830542e-05, "loss": 0.7924813628196716, "step": 1937 }, { "epoch": 0.6829955947136563, "grad_norm": 1.3030215719290177, "learning_rate": 1.570975082670785e-05, "loss": 0.6082741022109985, "step": 1938 }, { "epoch": 0.6833480176211454, "grad_norm": 1.5878638287998994, "learning_rate": 1.5704966576827563e-05, "loss": 0.7307756543159485, "step": 1939 }, { "epoch": 0.6837004405286343, "grad_norm": 1.421111197077357, "learning_rate": 1.570018039028867e-05, "loss": 0.6877273917198181, "step": 1940 }, { "epoch": 0.6840528634361234, "grad_norm": 3.222041323215856, "learning_rate": 1.5695392268715934e-05, "loss": 0.7702943086624146, "step": 1941 }, { "epoch": 0.6844052863436123, "grad_norm": 1.3182333231384877, "learning_rate": 1.569060221373477e-05, "loss": 0.6576820611953735, "step": 1942 }, { "epoch": 0.6847577092511014, "grad_norm": 1.6178003008675335, "learning_rate": 1.568581022697125e-05, "loss": 0.6605322360992432, "step": 1943 }, { "epoch": 0.6851101321585903, "grad_norm": 1.5479637201173908, "learning_rate": 1.568101631005211e-05, "loss": 0.8065364360809326, "step": 1944 }, { "epoch": 0.6854625550660793, "grad_norm": 1.5909483515555374, "learning_rate": 1.5676220464604726e-05, "loss": 0.8018748164176941, "step": 1945 }, { "epoch": 0.6858149779735683, "grad_norm": 1.4496461628107289, "learning_rate": 1.567142269225715e-05, "loss": 0.6114683151245117, "step": 1946 }, { "epoch": 0.6861674008810573, "grad_norm": 1.4567709922330223, "learning_rate": 1.566662299463807e-05, "loss": 0.8470789194107056, "step": 1947 }, { "epoch": 0.6865198237885463, "grad_norm": 1.4716494157627575, "learning_rate": 1.5661821373376837e-05, "loss": 0.7133561372756958, "step": 1948 }, { "epoch": 0.6868722466960352, "grad_norm": 1.6398709503866558, "learning_rate": 1.5657017830103448e-05, "loss": 0.9101625084877014, "step": 1949 }, { "epoch": 0.6872246696035242, "grad_norm": 1.8312595153810016, "learning_rate": 1.565221236644856e-05, "loss": 0.7395101189613342, "step": 1950 }, { "epoch": 0.6875770925110132, "grad_norm": 1.4532682115054107, "learning_rate": 1.5647404984043474e-05, "loss": 0.7421061992645264, "step": 1951 }, { "epoch": 0.6879295154185022, "grad_norm": 1.4495130982943423, "learning_rate": 1.5642595684520154e-05, "loss": 0.8744432330131531, "step": 1952 }, { "epoch": 0.6882819383259912, "grad_norm": 1.6475850419823541, "learning_rate": 1.56377844695112e-05, "loss": 0.8043868541717529, "step": 1953 }, { "epoch": 0.6886343612334802, "grad_norm": 1.444538108927131, "learning_rate": 1.5632971340649873e-05, "loss": 0.6231396198272705, "step": 1954 }, { "epoch": 0.6889867841409691, "grad_norm": 1.3765988847280666, "learning_rate": 1.562815629957008e-05, "loss": 0.7791434526443481, "step": 1955 }, { "epoch": 0.6893392070484582, "grad_norm": 1.2135950275511538, "learning_rate": 1.5623339347906383e-05, "loss": 0.5652475357055664, "step": 1956 }, { "epoch": 0.6896916299559471, "grad_norm": 1.4607959644694648, "learning_rate": 1.561852048729398e-05, "loss": 0.611067533493042, "step": 1957 }, { "epoch": 0.6900440528634362, "grad_norm": 1.2569255893474116, "learning_rate": 1.5613699719368724e-05, "loss": 0.7580389976501465, "step": 1958 }, { "epoch": 0.6903964757709251, "grad_norm": 1.516048041026883, "learning_rate": 1.560887704576712e-05, "loss": 0.6841205954551697, "step": 1959 }, { "epoch": 0.690748898678414, "grad_norm": 1.7678860610521125, "learning_rate": 1.5604052468126315e-05, "loss": 0.7600575089454651, "step": 1960 }, { "epoch": 0.6911013215859031, "grad_norm": 1.458096987341084, "learning_rate": 1.55992259880841e-05, "loss": 0.7547114491462708, "step": 1961 }, { "epoch": 0.691453744493392, "grad_norm": 1.3490975617996133, "learning_rate": 1.5594397607278912e-05, "loss": 0.6917474865913391, "step": 1962 }, { "epoch": 0.6918061674008811, "grad_norm": 1.378212312699651, "learning_rate": 1.5589567327349845e-05, "loss": 0.6820487976074219, "step": 1963 }, { "epoch": 0.69215859030837, "grad_norm": 1.4687305992297937, "learning_rate": 1.5584735149936628e-05, "loss": 0.6513597965240479, "step": 1964 }, { "epoch": 0.6925110132158591, "grad_norm": 1.4807223837447299, "learning_rate": 1.5579901076679625e-05, "loss": 0.668257474899292, "step": 1965 }, { "epoch": 0.692863436123348, "grad_norm": 1.5130451892313703, "learning_rate": 1.5575065109219864e-05, "loss": 0.7600705623626709, "step": 1966 }, { "epoch": 0.693215859030837, "grad_norm": 1.5218611988458295, "learning_rate": 1.5570227249198993e-05, "loss": 0.8140011429786682, "step": 1967 }, { "epoch": 0.693568281938326, "grad_norm": 1.1438716908088957, "learning_rate": 1.556538749825933e-05, "loss": 0.610436201095581, "step": 1968 }, { "epoch": 0.693920704845815, "grad_norm": 1.7706616264872619, "learning_rate": 1.556054585804381e-05, "loss": 0.7745693922042847, "step": 1969 }, { "epoch": 0.694273127753304, "grad_norm": 1.4076568647110412, "learning_rate": 1.5555702330196024e-05, "loss": 0.5809592008590698, "step": 1970 }, { "epoch": 0.6946255506607929, "grad_norm": 1.220751429593537, "learning_rate": 1.5550856916360195e-05, "loss": 0.6354515552520752, "step": 1971 }, { "epoch": 0.694977973568282, "grad_norm": 1.4513364815061058, "learning_rate": 1.5546009618181194e-05, "loss": 0.8076149225234985, "step": 1972 }, { "epoch": 0.6953303964757709, "grad_norm": 1.6702158357132753, "learning_rate": 1.5541160437304524e-05, "loss": 0.7553249597549438, "step": 1973 }, { "epoch": 0.6956828193832599, "grad_norm": 1.4495619596653457, "learning_rate": 1.5536309375376332e-05, "loss": 0.6109169125556946, "step": 1974 }, { "epoch": 0.6960352422907489, "grad_norm": 1.4052818449921982, "learning_rate": 1.5531456434043404e-05, "loss": 0.8184436559677124, "step": 1975 }, { "epoch": 0.6963876651982379, "grad_norm": 1.3611746850672197, "learning_rate": 1.5526601614953164e-05, "loss": 0.6823909878730774, "step": 1976 }, { "epoch": 0.6967400881057269, "grad_norm": 1.3254402340100906, "learning_rate": 1.5521744919753668e-05, "loss": 0.6669045090675354, "step": 1977 }, { "epoch": 0.6970925110132159, "grad_norm": 1.7752129025350782, "learning_rate": 1.5516886350093617e-05, "loss": 0.8054187297821045, "step": 1978 }, { "epoch": 0.6974449339207048, "grad_norm": 1.6379915816078137, "learning_rate": 1.551202590762234e-05, "loss": 0.7089184522628784, "step": 1979 }, { "epoch": 0.6977973568281939, "grad_norm": 1.5207382048575195, "learning_rate": 1.5507163593989804e-05, "loss": 0.7908214330673218, "step": 1980 }, { "epoch": 0.6981497797356828, "grad_norm": 1.454323961299799, "learning_rate": 1.5502299410846626e-05, "loss": 0.8859039545059204, "step": 1981 }, { "epoch": 0.6985022026431718, "grad_norm": 1.5085321450966587, "learning_rate": 1.549743335984403e-05, "loss": 0.7156866788864136, "step": 1982 }, { "epoch": 0.6988546255506608, "grad_norm": 1.4496904801370623, "learning_rate": 1.5492565442633894e-05, "loss": 0.6158934831619263, "step": 1983 }, { "epoch": 0.6992070484581497, "grad_norm": 1.5453977055484032, "learning_rate": 1.548769566086873e-05, "loss": 0.6689192056655884, "step": 1984 }, { "epoch": 0.6995594713656388, "grad_norm": 1.4591630403591411, "learning_rate": 1.548282401620167e-05, "loss": 0.6695841550827026, "step": 1985 }, { "epoch": 0.6999118942731277, "grad_norm": 1.6161480882103554, "learning_rate": 1.5477950510286488e-05, "loss": 0.7196098566055298, "step": 1986 }, { "epoch": 0.7002643171806168, "grad_norm": 1.5261033448052712, "learning_rate": 1.5473075144777586e-05, "loss": 0.7811123132705688, "step": 1987 }, { "epoch": 0.7006167400881057, "grad_norm": 1.3902237132074229, "learning_rate": 1.5468197921330006e-05, "loss": 0.6341326236724854, "step": 1988 }, { "epoch": 0.7009691629955948, "grad_norm": 1.4052051959904983, "learning_rate": 1.5463318841599408e-05, "loss": 0.6344352960586548, "step": 1989 }, { "epoch": 0.7013215859030837, "grad_norm": 1.5015659436227353, "learning_rate": 1.5458437907242084e-05, "loss": 0.6708072423934937, "step": 1990 }, { "epoch": 0.7016740088105727, "grad_norm": 1.4551372124338164, "learning_rate": 1.5453555119914963e-05, "loss": 0.7018578052520752, "step": 1991 }, { "epoch": 0.7020264317180617, "grad_norm": 1.4651591378979865, "learning_rate": 1.5448670481275604e-05, "loss": 0.6966190338134766, "step": 1992 }, { "epoch": 0.7023788546255506, "grad_norm": 1.2815956936347872, "learning_rate": 1.5443783992982182e-05, "loss": 0.6280171871185303, "step": 1993 }, { "epoch": 0.7027312775330397, "grad_norm": 1.451492070117077, "learning_rate": 1.5438895656693512e-05, "loss": 0.6644559502601624, "step": 1994 }, { "epoch": 0.7030837004405286, "grad_norm": 1.5030450433681415, "learning_rate": 1.543400547406903e-05, "loss": 0.776411771774292, "step": 1995 }, { "epoch": 0.7034361233480176, "grad_norm": 1.428531901666428, "learning_rate": 1.5429113446768805e-05, "loss": 0.6353679895401001, "step": 1996 }, { "epoch": 0.7037885462555066, "grad_norm": 1.468487936335314, "learning_rate": 1.5424219576453526e-05, "loss": 0.686774492263794, "step": 1997 }, { "epoch": 0.7041409691629956, "grad_norm": 1.2525683766202464, "learning_rate": 1.5419323864784508e-05, "loss": 0.5296701192855835, "step": 1998 }, { "epoch": 0.7044933920704846, "grad_norm": 1.264413948230812, "learning_rate": 1.5414426313423692e-05, "loss": 0.6246802806854248, "step": 1999 }, { "epoch": 0.7048458149779736, "grad_norm": 1.44172793688486, "learning_rate": 1.5409526924033646e-05, "loss": 0.6633912920951843, "step": 2000 }, { "epoch": 0.7051982378854625, "grad_norm": 1.720413855985522, "learning_rate": 1.540462569827756e-05, "loss": 0.7324577569961548, "step": 2001 }, { "epoch": 0.7055506607929516, "grad_norm": 1.6372387419200998, "learning_rate": 1.539972263781925e-05, "loss": 0.7988085746765137, "step": 2002 }, { "epoch": 0.7059030837004405, "grad_norm": 1.4528481393218415, "learning_rate": 1.539481774432315e-05, "loss": 0.6761256456375122, "step": 2003 }, { "epoch": 0.7062555066079295, "grad_norm": 1.6101005409981786, "learning_rate": 1.538991101945431e-05, "loss": 0.6647740006446838, "step": 2004 }, { "epoch": 0.7066079295154185, "grad_norm": 1.5047715708456952, "learning_rate": 1.538500246487843e-05, "loss": 0.7111536860466003, "step": 2005 }, { "epoch": 0.7069603524229074, "grad_norm": 1.8533704165409681, "learning_rate": 1.5380092082261797e-05, "loss": 0.7395933270454407, "step": 2006 }, { "epoch": 0.7073127753303965, "grad_norm": 1.4630720873509298, "learning_rate": 1.5375179873271335e-05, "loss": 0.6158996820449829, "step": 2007 }, { "epoch": 0.7076651982378854, "grad_norm": 1.4746770670226905, "learning_rate": 1.537026583957459e-05, "loss": 0.7259848117828369, "step": 2008 }, { "epoch": 0.7080176211453745, "grad_norm": 1.6674311554666914, "learning_rate": 1.5365349982839723e-05, "loss": 0.8370928764343262, "step": 2009 }, { "epoch": 0.7083700440528634, "grad_norm": 1.3618230849109776, "learning_rate": 1.536043230473551e-05, "loss": 0.6041784882545471, "step": 2010 }, { "epoch": 0.7087224669603525, "grad_norm": 1.4112680073946362, "learning_rate": 1.535551280693135e-05, "loss": 0.688548743724823, "step": 2011 }, { "epoch": 0.7090748898678414, "grad_norm": 1.6056330275270763, "learning_rate": 1.5350591491097265e-05, "loss": 0.573681652545929, "step": 2012 }, { "epoch": 0.7094273127753304, "grad_norm": 2.0956667904129636, "learning_rate": 1.5345668358903886e-05, "loss": 0.6919670104980469, "step": 2013 }, { "epoch": 0.7097797356828194, "grad_norm": 1.6440284625605202, "learning_rate": 1.534074341202246e-05, "loss": 0.6693999767303467, "step": 2014 }, { "epoch": 0.7101321585903083, "grad_norm": 1.5023686452775393, "learning_rate": 1.533581665212486e-05, "loss": 0.7204093337059021, "step": 2015 }, { "epoch": 0.7104845814977974, "grad_norm": 1.7353596990699613, "learning_rate": 1.5330888080883555e-05, "loss": 0.6196314096450806, "step": 2016 }, { "epoch": 0.7108370044052863, "grad_norm": 1.4190743094269347, "learning_rate": 1.5325957699971657e-05, "loss": 0.7292872071266174, "step": 2017 }, { "epoch": 0.7111894273127753, "grad_norm": 1.7578012075664924, "learning_rate": 1.532102551106287e-05, "loss": 0.7514410018920898, "step": 2018 }, { "epoch": 0.7115418502202643, "grad_norm": 1.329552917806312, "learning_rate": 1.531609151583152e-05, "loss": 0.7683345079421997, "step": 2019 }, { "epoch": 0.7118942731277533, "grad_norm": 1.8323846391695044, "learning_rate": 1.5311155715952536e-05, "loss": 0.6994156837463379, "step": 2020 }, { "epoch": 0.7122466960352423, "grad_norm": 1.3407977210543047, "learning_rate": 1.5306218113101482e-05, "loss": 0.5530328750610352, "step": 2021 }, { "epoch": 0.7125991189427313, "grad_norm": 1.6814720781682417, "learning_rate": 1.530127870895451e-05, "loss": 0.6126301884651184, "step": 2022 }, { "epoch": 0.7129515418502202, "grad_norm": 1.9618212705640916, "learning_rate": 1.5296337505188403e-05, "loss": 0.7514982223510742, "step": 2023 }, { "epoch": 0.7133039647577093, "grad_norm": 1.742411408925072, "learning_rate": 1.529139450348054e-05, "loss": 0.7087191939353943, "step": 2024 }, { "epoch": 0.7136563876651982, "grad_norm": 1.3195305972662899, "learning_rate": 1.5286449705508914e-05, "loss": 0.5713562965393066, "step": 2025 }, { "epoch": 0.7140088105726872, "grad_norm": 1.3621779724967453, "learning_rate": 1.5281503112952136e-05, "loss": 0.6796679496765137, "step": 2026 }, { "epoch": 0.7143612334801762, "grad_norm": 1.8247081007192694, "learning_rate": 1.5276554727489415e-05, "loss": 0.7902421355247498, "step": 2027 }, { "epoch": 0.7147136563876652, "grad_norm": 1.3608050254188053, "learning_rate": 1.527160455080058e-05, "loss": 0.6645491123199463, "step": 2028 }, { "epoch": 0.7150660792951542, "grad_norm": 1.489658346292968, "learning_rate": 1.5266652584566056e-05, "loss": 0.6077255606651306, "step": 2029 }, { "epoch": 0.7154185022026431, "grad_norm": 1.412193602346091, "learning_rate": 1.5261698830466888e-05, "loss": 0.6219078302383423, "step": 2030 }, { "epoch": 0.7157709251101322, "grad_norm": 1.280704281307457, "learning_rate": 1.5256743290184713e-05, "loss": 0.5895035266876221, "step": 2031 }, { "epoch": 0.7161233480176211, "grad_norm": 1.497416305314063, "learning_rate": 1.5251785965401786e-05, "loss": 0.6735520958900452, "step": 2032 }, { "epoch": 0.7164757709251102, "grad_norm": 1.353147232010895, "learning_rate": 1.524682685780097e-05, "loss": 0.6212488412857056, "step": 2033 }, { "epoch": 0.7168281938325991, "grad_norm": 1.5786628078958613, "learning_rate": 1.524186596906572e-05, "loss": 0.7181172966957092, "step": 2034 }, { "epoch": 0.7171806167400882, "grad_norm": 3.1301800941750906, "learning_rate": 1.5236903300880107e-05, "loss": 0.7156587839126587, "step": 2035 }, { "epoch": 0.7175330396475771, "grad_norm": 1.513371130481219, "learning_rate": 1.52319388549288e-05, "loss": 0.6989034414291382, "step": 2036 }, { "epoch": 0.7178854625550661, "grad_norm": 1.5183441818080943, "learning_rate": 1.5226972632897079e-05, "loss": 0.7224982976913452, "step": 2037 }, { "epoch": 0.7182378854625551, "grad_norm": 1.5033480023563544, "learning_rate": 1.522200463647082e-05, "loss": 0.6871547698974609, "step": 2038 }, { "epoch": 0.718590308370044, "grad_norm": 1.5898527901911406, "learning_rate": 1.5217034867336498e-05, "loss": 0.725049614906311, "step": 2039 }, { "epoch": 0.718942731277533, "grad_norm": 2.079980258079047, "learning_rate": 1.5212063327181197e-05, "loss": 0.7105863094329834, "step": 2040 }, { "epoch": 0.719295154185022, "grad_norm": 1.4720898042575539, "learning_rate": 1.5207090017692605e-05, "loss": 0.5823827981948853, "step": 2041 }, { "epoch": 0.719647577092511, "grad_norm": 1.9166232714289464, "learning_rate": 1.5202114940559005e-05, "loss": 0.7087944746017456, "step": 2042 }, { "epoch": 0.72, "grad_norm": 1.40676061171607, "learning_rate": 1.5197138097469275e-05, "loss": 0.6678824424743652, "step": 2043 }, { "epoch": 0.720352422907489, "grad_norm": 1.8181396920642288, "learning_rate": 1.5192159490112904e-05, "loss": 0.7318846583366394, "step": 2044 }, { "epoch": 0.720704845814978, "grad_norm": 1.4972370605408583, "learning_rate": 1.5187179120179969e-05, "loss": 0.7245825529098511, "step": 2045 }, { "epoch": 0.721057268722467, "grad_norm": 1.8554569851295908, "learning_rate": 1.5182196989361155e-05, "loss": 0.7691583633422852, "step": 2046 }, { "epoch": 0.7214096916299559, "grad_norm": 1.8926959198228865, "learning_rate": 1.517721309934774e-05, "loss": 0.7961187362670898, "step": 2047 }, { "epoch": 0.721762114537445, "grad_norm": 1.4465824812635413, "learning_rate": 1.51722274518316e-05, "loss": 0.7163759469985962, "step": 2048 }, { "epoch": 0.7221145374449339, "grad_norm": 1.5931659235074929, "learning_rate": 1.51672400485052e-05, "loss": 0.6807754039764404, "step": 2049 }, { "epoch": 0.7224669603524229, "grad_norm": 1.6629043788678177, "learning_rate": 1.516225089106162e-05, "loss": 0.7026433348655701, "step": 2050 }, { "epoch": 0.7228193832599119, "grad_norm": 1.5979782761024863, "learning_rate": 1.5157259981194514e-05, "loss": 0.8230476379394531, "step": 2051 }, { "epoch": 0.7231718061674008, "grad_norm": 1.7451468269512191, "learning_rate": 1.5152267320598149e-05, "loss": 0.6466805934906006, "step": 2052 }, { "epoch": 0.7235242290748899, "grad_norm": 1.441654513994546, "learning_rate": 1.5147272910967368e-05, "loss": 0.7203368544578552, "step": 2053 }, { "epoch": 0.7238766519823788, "grad_norm": 1.3552926542352444, "learning_rate": 1.5142276753997627e-05, "loss": 0.6455702781677246, "step": 2054 }, { "epoch": 0.7242290748898679, "grad_norm": 1.4569594560235375, "learning_rate": 1.5137278851384958e-05, "loss": 0.609260082244873, "step": 2055 }, { "epoch": 0.7245814977973568, "grad_norm": 1.8083723333355965, "learning_rate": 1.5132279204826e-05, "loss": 0.8320673704147339, "step": 2056 }, { "epoch": 0.7249339207048459, "grad_norm": 1.5846751172626037, "learning_rate": 1.512727781601797e-05, "loss": 0.8497718572616577, "step": 2057 }, { "epoch": 0.7252863436123348, "grad_norm": 1.3523103900088498, "learning_rate": 1.5122274686658695e-05, "loss": 0.6398370265960693, "step": 2058 }, { "epoch": 0.7256387665198238, "grad_norm": 1.4475161405549521, "learning_rate": 1.511726981844657e-05, "loss": 0.7562476396560669, "step": 2059 }, { "epoch": 0.7259911894273128, "grad_norm": 1.8369611551341436, "learning_rate": 1.51122632130806e-05, "loss": 0.7948570251464844, "step": 2060 }, { "epoch": 0.7263436123348017, "grad_norm": 1.9057892039367437, "learning_rate": 1.5107254872260366e-05, "loss": 0.7062652111053467, "step": 2061 }, { "epoch": 0.7266960352422908, "grad_norm": 1.666793884988277, "learning_rate": 1.5102244797686049e-05, "loss": 0.6290205717086792, "step": 2062 }, { "epoch": 0.7270484581497797, "grad_norm": 1.7111515682842917, "learning_rate": 1.5097232991058409e-05, "loss": 0.727097749710083, "step": 2063 }, { "epoch": 0.7274008810572687, "grad_norm": 1.6005396217530683, "learning_rate": 1.5092219454078803e-05, "loss": 0.783380389213562, "step": 2064 }, { "epoch": 0.7277533039647577, "grad_norm": 1.4872748126751951, "learning_rate": 1.5087204188449165e-05, "loss": 0.6190629601478577, "step": 2065 }, { "epoch": 0.7281057268722467, "grad_norm": 1.5426042958975894, "learning_rate": 1.5082187195872026e-05, "loss": 0.6749798059463501, "step": 2066 }, { "epoch": 0.7284581497797357, "grad_norm": 1.524694880675492, "learning_rate": 1.5077168478050494e-05, "loss": 0.6581153273582458, "step": 2067 }, { "epoch": 0.7288105726872247, "grad_norm": 1.433767292714838, "learning_rate": 1.5072148036688279e-05, "loss": 0.6886252760887146, "step": 2068 }, { "epoch": 0.7291629955947136, "grad_norm": 1.651630016781231, "learning_rate": 1.506712587348965e-05, "loss": 0.6893814206123352, "step": 2069 }, { "epoch": 0.7295154185022027, "grad_norm": 1.7840073958291343, "learning_rate": 1.5062101990159486e-05, "loss": 0.8242654800415039, "step": 2070 }, { "epoch": 0.7298678414096916, "grad_norm": 1.4785860236042563, "learning_rate": 1.5057076388403229e-05, "loss": 0.6331228017807007, "step": 2071 }, { "epoch": 0.7302202643171806, "grad_norm": 1.999658994203056, "learning_rate": 1.5052049069926927e-05, "loss": 0.6440649032592773, "step": 2072 }, { "epoch": 0.7305726872246696, "grad_norm": 1.4709264297577982, "learning_rate": 1.5047020036437187e-05, "loss": 0.7575498819351196, "step": 2073 }, { "epoch": 0.7309251101321586, "grad_norm": 1.8032604054381702, "learning_rate": 1.5041989289641215e-05, "loss": 0.7530438899993896, "step": 2074 }, { "epoch": 0.7312775330396476, "grad_norm": 1.5344556457224068, "learning_rate": 1.5036956831246792e-05, "loss": 0.6035616397857666, "step": 2075 }, { "epoch": 0.7316299559471365, "grad_norm": 1.5603807233808964, "learning_rate": 1.5031922662962279e-05, "loss": 0.8199492692947388, "step": 2076 }, { "epoch": 0.7319823788546256, "grad_norm": 1.4221584765379676, "learning_rate": 1.5026886786496624e-05, "loss": 0.7700716257095337, "step": 2077 }, { "epoch": 0.7323348017621145, "grad_norm": 1.363028657258907, "learning_rate": 1.5021849203559347e-05, "loss": 0.6147816777229309, "step": 2078 }, { "epoch": 0.7326872246696036, "grad_norm": 1.5628142146943151, "learning_rate": 1.5016809915860549e-05, "loss": 0.6841654777526855, "step": 2079 }, { "epoch": 0.7330396475770925, "grad_norm": 1.7910877668379601, "learning_rate": 1.5011768925110915e-05, "loss": 0.7212510108947754, "step": 2080 }, { "epoch": 0.7333920704845815, "grad_norm": 1.5222211216380177, "learning_rate": 1.5006726233021702e-05, "loss": 0.6695969104766846, "step": 2081 }, { "epoch": 0.7337444933920705, "grad_norm": 1.391558192885713, "learning_rate": 1.500168184130475e-05, "loss": 0.5991939306259155, "step": 2082 }, { "epoch": 0.7340969162995594, "grad_norm": 1.4191544168706896, "learning_rate": 1.4996635751672467e-05, "loss": 0.7127671241760254, "step": 2083 }, { "epoch": 0.7344493392070485, "grad_norm": 1.6905086418980109, "learning_rate": 1.4991587965837853e-05, "loss": 0.6874737739562988, "step": 2084 }, { "epoch": 0.7348017621145374, "grad_norm": 1.3584519480933235, "learning_rate": 1.4986538485514466e-05, "loss": 0.6695086359977722, "step": 2085 }, { "epoch": 0.7351541850220265, "grad_norm": 1.694264564137899, "learning_rate": 1.4981487312416452e-05, "loss": 0.8366880416870117, "step": 2086 }, { "epoch": 0.7355066079295154, "grad_norm": 1.4589826786561007, "learning_rate": 1.4976434448258519e-05, "loss": 0.6448042988777161, "step": 2087 }, { "epoch": 0.7358590308370044, "grad_norm": 1.8583566766216881, "learning_rate": 1.4971379894755969e-05, "loss": 0.7015181183815002, "step": 2088 }, { "epoch": 0.7362114537444934, "grad_norm": 1.702091122213854, "learning_rate": 1.4966323653624657e-05, "loss": 0.6842815279960632, "step": 2089 }, { "epoch": 0.7365638766519824, "grad_norm": 1.7134163669939546, "learning_rate": 1.4961265726581025e-05, "loss": 0.6866877675056458, "step": 2090 }, { "epoch": 0.7369162995594714, "grad_norm": 1.537334961209543, "learning_rate": 1.4956206115342076e-05, "loss": 0.5486865043640137, "step": 2091 }, { "epoch": 0.7372687224669604, "grad_norm": 1.7196744065626985, "learning_rate": 1.4951144821625396e-05, "loss": 0.7241986989974976, "step": 2092 }, { "epoch": 0.7376211453744493, "grad_norm": 1.647893211532232, "learning_rate": 1.4946081847149134e-05, "loss": 0.8400537967681885, "step": 2093 }, { "epoch": 0.7379735682819383, "grad_norm": 2.2262132208657146, "learning_rate": 1.4941017193632013e-05, "loss": 0.6050147414207458, "step": 2094 }, { "epoch": 0.7383259911894273, "grad_norm": 1.337421477916073, "learning_rate": 1.4935950862793322e-05, "loss": 0.6744229197502136, "step": 2095 }, { "epoch": 0.7386784140969163, "grad_norm": 1.4345512538147223, "learning_rate": 1.493088285635293e-05, "loss": 0.6902294158935547, "step": 2096 }, { "epoch": 0.7390308370044053, "grad_norm": 1.8712136012401615, "learning_rate": 1.492581317603126e-05, "loss": 0.6328809261322021, "step": 2097 }, { "epoch": 0.7393832599118942, "grad_norm": 1.4287618993627116, "learning_rate": 1.4920741823549316e-05, "loss": 0.5740914344787598, "step": 2098 }, { "epoch": 0.7397356828193833, "grad_norm": 2.181624869430245, "learning_rate": 1.491566880062866e-05, "loss": 0.676064133644104, "step": 2099 }, { "epoch": 0.7400881057268722, "grad_norm": 1.5152586818427025, "learning_rate": 1.4910594108991427e-05, "loss": 0.655153751373291, "step": 2100 }, { "epoch": 0.7404405286343613, "grad_norm": 1.7534591753196083, "learning_rate": 1.4905517750360321e-05, "loss": 0.7406177520751953, "step": 2101 }, { "epoch": 0.7407929515418502, "grad_norm": 1.777307095945404, "learning_rate": 1.4900439726458602e-05, "loss": 0.6568606495857239, "step": 2102 }, { "epoch": 0.7411453744493393, "grad_norm": 1.661203262476052, "learning_rate": 1.4895360039010101e-05, "loss": 0.8073545098304749, "step": 2103 }, { "epoch": 0.7414977973568282, "grad_norm": 1.6727123321226325, "learning_rate": 1.4890278689739219e-05, "loss": 0.6350502967834473, "step": 2104 }, { "epoch": 0.7418502202643171, "grad_norm": 1.475293376760879, "learning_rate": 1.4885195680370915e-05, "loss": 0.6419750452041626, "step": 2105 }, { "epoch": 0.7422026431718062, "grad_norm": 1.5480091112446772, "learning_rate": 1.4880111012630706e-05, "loss": 0.72661292552948, "step": 2106 }, { "epoch": 0.7425550660792951, "grad_norm": 1.5125479406066336, "learning_rate": 1.4875024688244683e-05, "loss": 0.6996778845787048, "step": 2107 }, { "epoch": 0.7429074889867842, "grad_norm": 1.7343888178448454, "learning_rate": 1.4869936708939497e-05, "loss": 0.8383389711380005, "step": 2108 }, { "epoch": 0.7432599118942731, "grad_norm": 1.6950461405964057, "learning_rate": 1.4864847076442358e-05, "loss": 0.6863676905632019, "step": 2109 }, { "epoch": 0.7436123348017621, "grad_norm": 1.781136801701718, "learning_rate": 1.4859755792481032e-05, "loss": 0.8493780493736267, "step": 2110 }, { "epoch": 0.7439647577092511, "grad_norm": 1.3754571175527768, "learning_rate": 1.4854662858783857e-05, "loss": 0.6172446012496948, "step": 2111 }, { "epoch": 0.7443171806167401, "grad_norm": 6.860121931549926, "learning_rate": 1.4849568277079724e-05, "loss": 0.8390353918075562, "step": 2112 }, { "epoch": 0.7446696035242291, "grad_norm": 1.8563178731324264, "learning_rate": 1.4844472049098087e-05, "loss": 0.7108968496322632, "step": 2113 }, { "epoch": 0.7450220264317181, "grad_norm": 1.5680406370173388, "learning_rate": 1.4839374176568956e-05, "loss": 0.7322912812232971, "step": 2114 }, { "epoch": 0.745374449339207, "grad_norm": 1.5999840343791083, "learning_rate": 1.4834274661222896e-05, "loss": 0.6371238231658936, "step": 2115 }, { "epoch": 0.745726872246696, "grad_norm": 1.6793360349519253, "learning_rate": 1.4829173504791035e-05, "loss": 0.8346511125564575, "step": 2116 }, { "epoch": 0.746079295154185, "grad_norm": 1.5530745059154032, "learning_rate": 1.4824070709005063e-05, "loss": 0.5893645286560059, "step": 2117 }, { "epoch": 0.746431718061674, "grad_norm": 1.298803943907695, "learning_rate": 1.4818966275597213e-05, "loss": 0.60541832447052, "step": 2118 }, { "epoch": 0.746784140969163, "grad_norm": 2.0046684565684108, "learning_rate": 1.4813860206300286e-05, "loss": 0.5823955535888672, "step": 2119 }, { "epoch": 0.747136563876652, "grad_norm": 1.8094924676670123, "learning_rate": 1.480875250284763e-05, "loss": 0.6751007437705994, "step": 2120 }, { "epoch": 0.747488986784141, "grad_norm": 1.5760168475146599, "learning_rate": 1.4803643166973155e-05, "loss": 0.6878843307495117, "step": 2121 }, { "epoch": 0.7478414096916299, "grad_norm": 1.4061876649605263, "learning_rate": 1.4798532200411319e-05, "loss": 0.6732173562049866, "step": 2122 }, { "epoch": 0.748193832599119, "grad_norm": 1.558565097379613, "learning_rate": 1.479341960489714e-05, "loss": 0.6383658647537231, "step": 2123 }, { "epoch": 0.7485462555066079, "grad_norm": 1.8120908321553708, "learning_rate": 1.4788305382166174e-05, "loss": 0.7444638013839722, "step": 2124 }, { "epoch": 0.748898678414097, "grad_norm": 1.7437949253948153, "learning_rate": 1.4783189533954555e-05, "loss": 0.5492427349090576, "step": 2125 }, { "epoch": 0.7492511013215859, "grad_norm": 1.60343309806789, "learning_rate": 1.4778072061998944e-05, "loss": 0.6193333864212036, "step": 2126 }, { "epoch": 0.7496035242290748, "grad_norm": 2.019729643045431, "learning_rate": 1.4772952968036572e-05, "loss": 0.853213906288147, "step": 2127 }, { "epoch": 0.7499559471365639, "grad_norm": 1.4306248677016198, "learning_rate": 1.4767832253805203e-05, "loss": 0.6128672361373901, "step": 2128 }, { "epoch": 0.7503083700440528, "grad_norm": 1.7550432779472305, "learning_rate": 1.4762709921043166e-05, "loss": 0.7298723459243774, "step": 2129 }, { "epoch": 0.7506607929515419, "grad_norm": 1.3773404123246435, "learning_rate": 1.475758597148933e-05, "loss": 0.6578782796859741, "step": 2130 }, { "epoch": 0.7510132158590308, "grad_norm": 1.6603784675007325, "learning_rate": 1.4752460406883122e-05, "loss": 0.6490681171417236, "step": 2131 }, { "epoch": 0.7513656387665198, "grad_norm": 1.530112138397779, "learning_rate": 1.4747333228964502e-05, "loss": 0.657980740070343, "step": 2132 }, { "epoch": 0.7517180616740088, "grad_norm": 1.9937499661396574, "learning_rate": 1.4742204439473999e-05, "loss": 0.8431578874588013, "step": 2133 }, { "epoch": 0.7520704845814978, "grad_norm": 1.7351787739786175, "learning_rate": 1.4737074040152667e-05, "loss": 0.7217377424240112, "step": 2134 }, { "epoch": 0.7524229074889868, "grad_norm": 2.232953474209366, "learning_rate": 1.4731942032742127e-05, "loss": 0.6299912333488464, "step": 2135 }, { "epoch": 0.7527753303964758, "grad_norm": 1.6053563211063129, "learning_rate": 1.4726808418984527e-05, "loss": 0.6325603723526001, "step": 2136 }, { "epoch": 0.7531277533039648, "grad_norm": 1.7427287871247603, "learning_rate": 1.4721673200622572e-05, "loss": 0.6785098314285278, "step": 2137 }, { "epoch": 0.7534801762114537, "grad_norm": 2.5780020778792068, "learning_rate": 1.471653637939951e-05, "loss": 0.7311918139457703, "step": 2138 }, { "epoch": 0.7538325991189427, "grad_norm": 1.498799685922224, "learning_rate": 1.4711397957059132e-05, "loss": 0.7117096781730652, "step": 2139 }, { "epoch": 0.7541850220264317, "grad_norm": 1.4519847744536865, "learning_rate": 1.4706257935345772e-05, "loss": 0.6709408760070801, "step": 2140 }, { "epoch": 0.7545374449339207, "grad_norm": 1.9629689982019365, "learning_rate": 1.4701116316004307e-05, "loss": 0.6478008031845093, "step": 2141 }, { "epoch": 0.7548898678414097, "grad_norm": 1.5362345610055923, "learning_rate": 1.4695973100780154e-05, "loss": 0.6414140462875366, "step": 2142 }, { "epoch": 0.7552422907488987, "grad_norm": 1.7088547501964069, "learning_rate": 1.4690828291419283e-05, "loss": 0.6947815418243408, "step": 2143 }, { "epoch": 0.7555947136563876, "grad_norm": 1.6244554419934112, "learning_rate": 1.4685681889668187e-05, "loss": 0.6614837646484375, "step": 2144 }, { "epoch": 0.7559471365638767, "grad_norm": 1.87010430937903, "learning_rate": 1.4680533897273913e-05, "loss": 0.7803678512573242, "step": 2145 }, { "epoch": 0.7562995594713656, "grad_norm": 1.975192105020327, "learning_rate": 1.4675384315984045e-05, "loss": 0.8411567211151123, "step": 2146 }, { "epoch": 0.7566519823788547, "grad_norm": 2.4329758477488177, "learning_rate": 1.4670233147546708e-05, "loss": 0.8379243016242981, "step": 2147 }, { "epoch": 0.7570044052863436, "grad_norm": 1.6153137773652926, "learning_rate": 1.4665080393710558e-05, "loss": 0.6419194936752319, "step": 2148 }, { "epoch": 0.7573568281938327, "grad_norm": 1.8383077301350303, "learning_rate": 1.4659926056224798e-05, "loss": 0.7791979908943176, "step": 2149 }, { "epoch": 0.7577092511013216, "grad_norm": 1.72203201226436, "learning_rate": 1.465477013683916e-05, "loss": 0.7237389087677002, "step": 2150 }, { "epoch": 0.7580616740088105, "grad_norm": 1.5129431088418641, "learning_rate": 1.464961263730393e-05, "loss": 0.6750755906105042, "step": 2151 }, { "epoch": 0.7584140969162996, "grad_norm": 1.3799525283393634, "learning_rate": 1.4644453559369904e-05, "loss": 0.5412150621414185, "step": 2152 }, { "epoch": 0.7587665198237885, "grad_norm": 1.7752121571388841, "learning_rate": 1.463929290478844e-05, "loss": 0.7009850740432739, "step": 2153 }, { "epoch": 0.7591189427312776, "grad_norm": 1.5166585489574307, "learning_rate": 1.4634130675311411e-05, "loss": 0.8678998351097107, "step": 2154 }, { "epoch": 0.7594713656387665, "grad_norm": 2.0127463717616347, "learning_rate": 1.4628966872691241e-05, "loss": 0.7395705580711365, "step": 2155 }, { "epoch": 0.7598237885462555, "grad_norm": 1.5739842401493016, "learning_rate": 1.4623801498680875e-05, "loss": 0.5950812101364136, "step": 2156 }, { "epoch": 0.7601762114537445, "grad_norm": 1.6474041176538503, "learning_rate": 1.46186345550338e-05, "loss": 0.7133630514144897, "step": 2157 }, { "epoch": 0.7605286343612335, "grad_norm": 1.4644647660974064, "learning_rate": 1.4613466043504026e-05, "loss": 0.7551965117454529, "step": 2158 }, { "epoch": 0.7608810572687225, "grad_norm": 1.4284086636489846, "learning_rate": 1.4608295965846111e-05, "loss": 0.6654022932052612, "step": 2159 }, { "epoch": 0.7612334801762115, "grad_norm": 3.5518990487711126, "learning_rate": 1.460312432381513e-05, "loss": 0.8081967830657959, "step": 2160 }, { "epoch": 0.7615859030837004, "grad_norm": 1.8113760087057564, "learning_rate": 1.4597951119166696e-05, "loss": 0.7478348016738892, "step": 2161 }, { "epoch": 0.7619383259911894, "grad_norm": 2.9384500423152833, "learning_rate": 1.4592776353656948e-05, "loss": 0.7866748571395874, "step": 2162 }, { "epoch": 0.7622907488986784, "grad_norm": 1.4185631764668494, "learning_rate": 1.4587600029042563e-05, "loss": 0.6675869226455688, "step": 2163 }, { "epoch": 0.7626431718061674, "grad_norm": 1.934904377243222, "learning_rate": 1.4582422147080739e-05, "loss": 0.6881103515625, "step": 2164 }, { "epoch": 0.7629955947136564, "grad_norm": 1.6886719056667128, "learning_rate": 1.457724270952921e-05, "loss": 0.7298593521118164, "step": 2165 }, { "epoch": 0.7633480176211453, "grad_norm": 1.5123877451607526, "learning_rate": 1.4572061718146224e-05, "loss": 0.7102776765823364, "step": 2166 }, { "epoch": 0.7637004405286344, "grad_norm": 1.6706836844885837, "learning_rate": 1.4566879174690576e-05, "loss": 0.7767213582992554, "step": 2167 }, { "epoch": 0.7640528634361233, "grad_norm": 1.4702267439170456, "learning_rate": 1.4561695080921573e-05, "loss": 0.7480257749557495, "step": 2168 }, { "epoch": 0.7644052863436124, "grad_norm": 1.4326376726611632, "learning_rate": 1.4556509438599057e-05, "loss": 0.7419564723968506, "step": 2169 }, { "epoch": 0.7647577092511013, "grad_norm": 1.4787079836022163, "learning_rate": 1.4551322249483388e-05, "loss": 0.6820264458656311, "step": 2170 }, { "epoch": 0.7651101321585904, "grad_norm": 1.3819947250134947, "learning_rate": 1.4546133515335462e-05, "loss": 0.5947732329368591, "step": 2171 }, { "epoch": 0.7654625550660793, "grad_norm": 1.6478975280830812, "learning_rate": 1.4540943237916685e-05, "loss": 0.6772021055221558, "step": 2172 }, { "epoch": 0.7658149779735682, "grad_norm": 1.7643629263201115, "learning_rate": 1.4535751418989e-05, "loss": 0.7822210192680359, "step": 2173 }, { "epoch": 0.7661674008810573, "grad_norm": 1.6079996302057808, "learning_rate": 1.4530558060314866e-05, "loss": 0.6208021640777588, "step": 2174 }, { "epoch": 0.7665198237885462, "grad_norm": 1.5681481752797541, "learning_rate": 1.4525363163657264e-05, "loss": 0.8017063140869141, "step": 2175 }, { "epoch": 0.7668722466960353, "grad_norm": 1.4681783580715917, "learning_rate": 1.4520166730779704e-05, "loss": 0.738383948802948, "step": 2176 }, { "epoch": 0.7672246696035242, "grad_norm": 1.742058488341915, "learning_rate": 1.4514968763446213e-05, "loss": 0.7698314785957336, "step": 2177 }, { "epoch": 0.7675770925110132, "grad_norm": 1.7037031257568012, "learning_rate": 1.4509769263421337e-05, "loss": 0.789836049079895, "step": 2178 }, { "epoch": 0.7679295154185022, "grad_norm": 1.8506345351591968, "learning_rate": 1.4504568232470145e-05, "loss": 0.6437339782714844, "step": 2179 }, { "epoch": 0.7682819383259912, "grad_norm": 2.04999468198658, "learning_rate": 1.4499365672358226e-05, "loss": 0.6684735417366028, "step": 2180 }, { "epoch": 0.7686343612334802, "grad_norm": 1.5077038126146909, "learning_rate": 1.4494161584851687e-05, "loss": 0.6577454805374146, "step": 2181 }, { "epoch": 0.7689867841409692, "grad_norm": 1.3277471323795764, "learning_rate": 1.4488955971717154e-05, "loss": 0.5975776314735413, "step": 2182 }, { "epoch": 0.7693392070484582, "grad_norm": 1.8819815707164231, "learning_rate": 1.4483748834721767e-05, "loss": 0.6385577917098999, "step": 2183 }, { "epoch": 0.7696916299559471, "grad_norm": 1.4452778349053288, "learning_rate": 1.4478540175633193e-05, "loss": 0.6295928955078125, "step": 2184 }, { "epoch": 0.7700440528634361, "grad_norm": 1.5790897154124113, "learning_rate": 1.4473329996219605e-05, "loss": 0.6848496198654175, "step": 2185 }, { "epoch": 0.7703964757709251, "grad_norm": 1.410283277756768, "learning_rate": 1.44681182982497e-05, "loss": 0.6476501226425171, "step": 2186 }, { "epoch": 0.7707488986784141, "grad_norm": 1.5220085975801703, "learning_rate": 1.4462905083492683e-05, "loss": 0.750103235244751, "step": 2187 }, { "epoch": 0.771101321585903, "grad_norm": 1.3838063845924222, "learning_rate": 1.4457690353718285e-05, "loss": 0.668454110622406, "step": 2188 }, { "epoch": 0.7714537444933921, "grad_norm": 1.3695000422583874, "learning_rate": 1.4452474110696738e-05, "loss": 0.6671048402786255, "step": 2189 }, { "epoch": 0.771806167400881, "grad_norm": 1.404147919130693, "learning_rate": 1.4447256356198797e-05, "loss": 0.6261379718780518, "step": 2190 }, { "epoch": 0.7721585903083701, "grad_norm": 1.6192228095415668, "learning_rate": 1.4442037091995726e-05, "loss": 0.6128308176994324, "step": 2191 }, { "epoch": 0.772511013215859, "grad_norm": 1.629684954387357, "learning_rate": 1.4436816319859306e-05, "loss": 0.7709108591079712, "step": 2192 }, { "epoch": 0.7728634361233481, "grad_norm": 1.7604991326643686, "learning_rate": 1.4431594041561822e-05, "loss": 0.6242028474807739, "step": 2193 }, { "epoch": 0.773215859030837, "grad_norm": 1.7562103574700596, "learning_rate": 1.4426370258876079e-05, "loss": 0.8030718564987183, "step": 2194 }, { "epoch": 0.7735682819383259, "grad_norm": 1.5182882363444798, "learning_rate": 1.4421144973575386e-05, "loss": 0.7785710692405701, "step": 2195 }, { "epoch": 0.773920704845815, "grad_norm": 1.5453752656669346, "learning_rate": 1.4415918187433564e-05, "loss": 0.6846014857292175, "step": 2196 }, { "epoch": 0.7742731277533039, "grad_norm": 1.6007643935951585, "learning_rate": 1.4410689902224947e-05, "loss": 0.7883827686309814, "step": 2197 }, { "epoch": 0.774625550660793, "grad_norm": 2.0453745328196065, "learning_rate": 1.4405460119724377e-05, "loss": 0.8285650610923767, "step": 2198 }, { "epoch": 0.7749779735682819, "grad_norm": 1.5026043059194256, "learning_rate": 1.4400228841707193e-05, "loss": 0.6101093292236328, "step": 2199 }, { "epoch": 0.775330396475771, "grad_norm": 1.4888885445589903, "learning_rate": 1.4394996069949262e-05, "loss": 0.6627891063690186, "step": 2200 }, { "epoch": 0.7756828193832599, "grad_norm": 1.4487650646569075, "learning_rate": 1.4389761806226943e-05, "loss": 0.6755822896957397, "step": 2201 }, { "epoch": 0.7760352422907489, "grad_norm": 1.438634659048083, "learning_rate": 1.4384526052317106e-05, "loss": 0.6718465089797974, "step": 2202 }, { "epoch": 0.7763876651982379, "grad_norm": 1.4171659147035778, "learning_rate": 1.4379288809997121e-05, "loss": 0.5857758522033691, "step": 2203 }, { "epoch": 0.7767400881057269, "grad_norm": 1.1200186604200135, "learning_rate": 1.4374050081044876e-05, "loss": 0.5861783027648926, "step": 2204 }, { "epoch": 0.7770925110132159, "grad_norm": 1.442532656158601, "learning_rate": 1.4368809867238754e-05, "loss": 0.6862374544143677, "step": 2205 }, { "epoch": 0.7774449339207048, "grad_norm": 1.6455201954220524, "learning_rate": 1.4363568170357646e-05, "loss": 0.6787701845169067, "step": 2206 }, { "epoch": 0.7777973568281938, "grad_norm": 1.4101038203667695, "learning_rate": 1.435832499218094e-05, "loss": 0.5671687126159668, "step": 2207 }, { "epoch": 0.7781497797356828, "grad_norm": 1.5479554264257531, "learning_rate": 1.435308033448854e-05, "loss": 0.8243429064750671, "step": 2208 }, { "epoch": 0.7785022026431718, "grad_norm": 1.3676716888852272, "learning_rate": 1.4347834199060835e-05, "loss": 0.5880655646324158, "step": 2209 }, { "epoch": 0.7788546255506608, "grad_norm": 2.451624357800272, "learning_rate": 1.4342586587678734e-05, "loss": 0.7085679769515991, "step": 2210 }, { "epoch": 0.7792070484581498, "grad_norm": 1.546990179750224, "learning_rate": 1.4337337502123627e-05, "loss": 0.7011853456497192, "step": 2211 }, { "epoch": 0.7795594713656387, "grad_norm": 1.6003260447933962, "learning_rate": 1.4332086944177426e-05, "loss": 0.755327582359314, "step": 2212 }, { "epoch": 0.7799118942731278, "grad_norm": 1.3917359947430683, "learning_rate": 1.4326834915622522e-05, "loss": 0.7152736186981201, "step": 2213 }, { "epoch": 0.7802643171806167, "grad_norm": 1.3821995576878587, "learning_rate": 1.4321581418241825e-05, "loss": 0.6744083166122437, "step": 2214 }, { "epoch": 0.7806167400881058, "grad_norm": 1.5294456027931242, "learning_rate": 1.4316326453818728e-05, "loss": 0.6112288236618042, "step": 2215 }, { "epoch": 0.7809691629955947, "grad_norm": 1.2620758120071194, "learning_rate": 1.4311070024137128e-05, "loss": 0.5569246411323547, "step": 2216 }, { "epoch": 0.7813215859030836, "grad_norm": 1.474883531826743, "learning_rate": 1.4305812130981418e-05, "loss": 0.6214494705200195, "step": 2217 }, { "epoch": 0.7816740088105727, "grad_norm": 1.4094788075709526, "learning_rate": 1.4300552776136497e-05, "loss": 0.5401003956794739, "step": 2218 }, { "epoch": 0.7820264317180616, "grad_norm": 1.433294268920241, "learning_rate": 1.4295291961387742e-05, "loss": 0.5128720998764038, "step": 2219 }, { "epoch": 0.7823788546255507, "grad_norm": 1.352265751544302, "learning_rate": 1.4290029688521043e-05, "loss": 0.5495916604995728, "step": 2220 }, { "epoch": 0.7827312775330396, "grad_norm": 1.6131865642068703, "learning_rate": 1.4284765959322772e-05, "loss": 0.628544807434082, "step": 2221 }, { "epoch": 0.7830837004405287, "grad_norm": 1.443784571277232, "learning_rate": 1.427950077557981e-05, "loss": 0.7171294689178467, "step": 2222 }, { "epoch": 0.7834361233480176, "grad_norm": 1.3723589201513293, "learning_rate": 1.4274234139079513e-05, "loss": 0.7436389327049255, "step": 2223 }, { "epoch": 0.7837885462555066, "grad_norm": 1.5295286402885273, "learning_rate": 1.426896605160975e-05, "loss": 0.7154244780540466, "step": 2224 }, { "epoch": 0.7841409691629956, "grad_norm": 1.4385555847293963, "learning_rate": 1.426369651495886e-05, "loss": 0.6433268189430237, "step": 2225 }, { "epoch": 0.7844933920704846, "grad_norm": 1.4177681718218336, "learning_rate": 1.4258425530915703e-05, "loss": 0.6612321734428406, "step": 2226 }, { "epoch": 0.7848458149779736, "grad_norm": 1.962010974229914, "learning_rate": 1.42531531012696e-05, "loss": 0.6384811401367188, "step": 2227 }, { "epoch": 0.7851982378854625, "grad_norm": 1.4927220821701634, "learning_rate": 1.4247879227810384e-05, "loss": 0.5592762231826782, "step": 2228 }, { "epoch": 0.7855506607929515, "grad_norm": 1.6376570609433725, "learning_rate": 1.4242603912328367e-05, "loss": 0.6904512643814087, "step": 2229 }, { "epoch": 0.7859030837004405, "grad_norm": 1.7784965930873091, "learning_rate": 1.4237327156614358e-05, "loss": 0.7165266871452332, "step": 2230 }, { "epoch": 0.7862555066079295, "grad_norm": 1.6275397333714936, "learning_rate": 1.423204896245965e-05, "loss": 0.8567172288894653, "step": 2231 }, { "epoch": 0.7866079295154185, "grad_norm": 1.6554990252792119, "learning_rate": 1.4226769331656028e-05, "loss": 0.6595934629440308, "step": 2232 }, { "epoch": 0.7869603524229075, "grad_norm": 1.8034278962736743, "learning_rate": 1.4221488265995755e-05, "loss": 0.750861644744873, "step": 2233 }, { "epoch": 0.7873127753303965, "grad_norm": 1.3674194021669617, "learning_rate": 1.4216205767271597e-05, "loss": 0.7146387696266174, "step": 2234 }, { "epoch": 0.7876651982378855, "grad_norm": 1.9347692502503655, "learning_rate": 1.4210921837276792e-05, "loss": 0.58647221326828, "step": 2235 }, { "epoch": 0.7880176211453744, "grad_norm": 1.4888974250205094, "learning_rate": 1.4205636477805072e-05, "loss": 0.6893318891525269, "step": 2236 }, { "epoch": 0.7883700440528635, "grad_norm": 1.1833417050311776, "learning_rate": 1.4200349690650654e-05, "loss": 0.5545464158058167, "step": 2237 }, { "epoch": 0.7887224669603524, "grad_norm": 1.6014523598259138, "learning_rate": 1.4195061477608234e-05, "loss": 0.6088600158691406, "step": 2238 }, { "epoch": 0.7890748898678414, "grad_norm": 1.3513904877886467, "learning_rate": 1.4189771840472997e-05, "loss": 0.6330769658088684, "step": 2239 }, { "epoch": 0.7894273127753304, "grad_norm": 1.4283770062393895, "learning_rate": 1.4184480781040613e-05, "loss": 0.678654670715332, "step": 2240 }, { "epoch": 0.7897797356828193, "grad_norm": 1.445633946040222, "learning_rate": 1.417918830110723e-05, "loss": 0.6259177923202515, "step": 2241 }, { "epoch": 0.7901321585903084, "grad_norm": 1.408151849302333, "learning_rate": 1.4173894402469477e-05, "loss": 0.634982168674469, "step": 2242 }, { "epoch": 0.7904845814977973, "grad_norm": 1.37778450193705, "learning_rate": 1.4168599086924473e-05, "loss": 0.6610612869262695, "step": 2243 }, { "epoch": 0.7908370044052864, "grad_norm": 1.386127288755765, "learning_rate": 1.416330235626981e-05, "loss": 0.6952961683273315, "step": 2244 }, { "epoch": 0.7911894273127753, "grad_norm": 1.6165363001234343, "learning_rate": 1.4158004212303565e-05, "loss": 0.5055881142616272, "step": 2245 }, { "epoch": 0.7915418502202644, "grad_norm": 1.4841191669035856, "learning_rate": 1.4152704656824288e-05, "loss": 0.7284455299377441, "step": 2246 }, { "epoch": 0.7918942731277533, "grad_norm": 1.3583334859782668, "learning_rate": 1.414740369163102e-05, "loss": 0.6985108852386475, "step": 2247 }, { "epoch": 0.7922466960352423, "grad_norm": 1.3664811170856164, "learning_rate": 1.4142101318523271e-05, "loss": 0.5967550277709961, "step": 2248 }, { "epoch": 0.7925991189427313, "grad_norm": 1.5695298710984633, "learning_rate": 1.4136797539301033e-05, "loss": 0.7696695327758789, "step": 2249 }, { "epoch": 0.7929515418502202, "grad_norm": 1.3234775564665824, "learning_rate": 1.413149235576477e-05, "loss": 0.8131378293037415, "step": 2250 }, { "epoch": 0.7933039647577093, "grad_norm": 1.8429663529686, "learning_rate": 1.4126185769715428e-05, "loss": 0.8029932975769043, "step": 2251 }, { "epoch": 0.7936563876651982, "grad_norm": 1.720051288151631, "learning_rate": 1.412087778295443e-05, "loss": 0.7408573031425476, "step": 2252 }, { "epoch": 0.7940088105726872, "grad_norm": 1.8037723298533723, "learning_rate": 1.411556839728367e-05, "loss": 0.8624325394630432, "step": 2253 }, { "epoch": 0.7943612334801762, "grad_norm": 1.5291561523904078, "learning_rate": 1.411025761450552e-05, "loss": 0.7635384798049927, "step": 2254 }, { "epoch": 0.7947136563876652, "grad_norm": 1.5012301776005823, "learning_rate": 1.4104945436422832e-05, "loss": 0.5612920522689819, "step": 2255 }, { "epoch": 0.7950660792951542, "grad_norm": 1.5891725973137842, "learning_rate": 1.4099631864838912e-05, "loss": 0.5792248845100403, "step": 2256 }, { "epoch": 0.7954185022026432, "grad_norm": 1.427703140365858, "learning_rate": 1.4094316901557563e-05, "loss": 0.7405142188072205, "step": 2257 }, { "epoch": 0.7957709251101321, "grad_norm": 1.5302016454534209, "learning_rate": 1.4089000548383044e-05, "loss": 0.630780816078186, "step": 2258 }, { "epoch": 0.7961233480176212, "grad_norm": 1.5690685088460359, "learning_rate": 1.4083682807120092e-05, "loss": 0.6737201809883118, "step": 2259 }, { "epoch": 0.7964757709251101, "grad_norm": 4.158789316506426, "learning_rate": 1.4078363679573918e-05, "loss": 0.6469985842704773, "step": 2260 }, { "epoch": 0.7968281938325992, "grad_norm": 1.4774582614404035, "learning_rate": 1.4073043167550198e-05, "loss": 0.6315224170684814, "step": 2261 }, { "epoch": 0.7971806167400881, "grad_norm": 1.1766652256758812, "learning_rate": 1.4067721272855079e-05, "loss": 0.6785402297973633, "step": 2262 }, { "epoch": 0.797533039647577, "grad_norm": 1.4677269844033833, "learning_rate": 1.406239799729518e-05, "loss": 0.7131394147872925, "step": 2263 }, { "epoch": 0.7978854625550661, "grad_norm": 1.5575833651180606, "learning_rate": 1.405707334267759e-05, "loss": 0.6921142339706421, "step": 2264 }, { "epoch": 0.798237885462555, "grad_norm": 1.375694666198905, "learning_rate": 1.4051747310809863e-05, "loss": 0.695213794708252, "step": 2265 }, { "epoch": 0.7985903083700441, "grad_norm": 1.8529986724322307, "learning_rate": 1.4046419903500013e-05, "loss": 0.7081988453865051, "step": 2266 }, { "epoch": 0.798942731277533, "grad_norm": 1.4461573292928833, "learning_rate": 1.4041091122556539e-05, "loss": 0.6404637098312378, "step": 2267 }, { "epoch": 0.7992951541850221, "grad_norm": 1.3566691109367863, "learning_rate": 1.403576096978839e-05, "loss": 0.6404134631156921, "step": 2268 }, { "epoch": 0.799647577092511, "grad_norm": 1.5118859398886633, "learning_rate": 1.4030429447004992e-05, "loss": 0.7963751554489136, "step": 2269 }, { "epoch": 0.8, "grad_norm": 1.632997404115334, "learning_rate": 1.4025096556016224e-05, "loss": 0.6648174524307251, "step": 2270 }, { "epoch": 0.800352422907489, "grad_norm": 1.4103532345019565, "learning_rate": 1.4019762298632445e-05, "loss": 0.6661815047264099, "step": 2271 }, { "epoch": 0.800704845814978, "grad_norm": 1.7237738440956045, "learning_rate": 1.4014426676664462e-05, "loss": 0.6194477081298828, "step": 2272 }, { "epoch": 0.801057268722467, "grad_norm": 1.8457235726726873, "learning_rate": 1.400908969192356e-05, "loss": 0.6869276762008667, "step": 2273 }, { "epoch": 0.8014096916299559, "grad_norm": 1.7545140114513338, "learning_rate": 1.4003751346221472e-05, "loss": 0.7352420091629028, "step": 2274 }, { "epoch": 0.801762114537445, "grad_norm": 1.5994812918128933, "learning_rate": 1.3998411641370405e-05, "loss": 0.8212440609931946, "step": 2275 }, { "epoch": 0.8021145374449339, "grad_norm": 1.5868623288152288, "learning_rate": 1.3993070579183021e-05, "loss": 0.6897045969963074, "step": 2276 }, { "epoch": 0.8024669603524229, "grad_norm": 1.716974382638037, "learning_rate": 1.3987728161472442e-05, "loss": 0.8406906127929688, "step": 2277 }, { "epoch": 0.8028193832599119, "grad_norm": 1.6664794009014727, "learning_rate": 1.3982384390052257e-05, "loss": 0.6236976385116577, "step": 2278 }, { "epoch": 0.8031718061674009, "grad_norm": 1.7056031446043847, "learning_rate": 1.3977039266736508e-05, "loss": 0.8110965490341187, "step": 2279 }, { "epoch": 0.8035242290748899, "grad_norm": 1.6273998334271178, "learning_rate": 1.3971692793339697e-05, "loss": 0.635534405708313, "step": 2280 }, { "epoch": 0.8038766519823789, "grad_norm": 1.5382566365445476, "learning_rate": 1.3966344971676789e-05, "loss": 0.7806028127670288, "step": 2281 }, { "epoch": 0.8042290748898678, "grad_norm": 1.7131487498074927, "learning_rate": 1.3960995803563195e-05, "loss": 0.6635935306549072, "step": 2282 }, { "epoch": 0.8045814977973569, "grad_norm": 1.6068551029738092, "learning_rate": 1.39556452908148e-05, "loss": 0.6064634323120117, "step": 2283 }, { "epoch": 0.8049339207048458, "grad_norm": 1.7686604234656398, "learning_rate": 1.3950293435247933e-05, "loss": 0.760187029838562, "step": 2284 }, { "epoch": 0.8052863436123348, "grad_norm": 1.5333245954906318, "learning_rate": 1.3944940238679384e-05, "loss": 0.7004644274711609, "step": 2285 }, { "epoch": 0.8056387665198238, "grad_norm": 1.9274194313344672, "learning_rate": 1.393958570292639e-05, "loss": 0.7662780284881592, "step": 2286 }, { "epoch": 0.8059911894273127, "grad_norm": 1.3943181397787612, "learning_rate": 1.393422982980666e-05, "loss": 0.7939090132713318, "step": 2287 }, { "epoch": 0.8063436123348018, "grad_norm": 1.377559765071464, "learning_rate": 1.3928872621138337e-05, "loss": 0.7461861371994019, "step": 2288 }, { "epoch": 0.8066960352422907, "grad_norm": 1.4875661773009663, "learning_rate": 1.3923514078740032e-05, "loss": 0.5997019410133362, "step": 2289 }, { "epoch": 0.8070484581497798, "grad_norm": 1.5379009713311227, "learning_rate": 1.3918154204430801e-05, "loss": 0.5437384843826294, "step": 2290 }, { "epoch": 0.8074008810572687, "grad_norm": 1.8168415447512607, "learning_rate": 1.3912793000030154e-05, "loss": 0.7387127876281738, "step": 2291 }, { "epoch": 0.8077533039647578, "grad_norm": 1.305308107523337, "learning_rate": 1.3907430467358054e-05, "loss": 0.483035147190094, "step": 2292 }, { "epoch": 0.8081057268722467, "grad_norm": 1.3669144351401303, "learning_rate": 1.3902066608234919e-05, "loss": 0.6208503842353821, "step": 2293 }, { "epoch": 0.8084581497797357, "grad_norm": 1.7196168695476914, "learning_rate": 1.3896701424481603e-05, "loss": 0.6691559553146362, "step": 2294 }, { "epoch": 0.8088105726872247, "grad_norm": 1.6945751274550964, "learning_rate": 1.3891334917919422e-05, "loss": 0.8960802555084229, "step": 2295 }, { "epoch": 0.8091629955947136, "grad_norm": 1.7625732291329363, "learning_rate": 1.388596709037014e-05, "loss": 0.669715404510498, "step": 2296 }, { "epoch": 0.8095154185022027, "grad_norm": 1.4235891674683654, "learning_rate": 1.3880597943655972e-05, "loss": 0.7356190085411072, "step": 2297 }, { "epoch": 0.8098678414096916, "grad_norm": 1.6403595773987272, "learning_rate": 1.3875227479599565e-05, "loss": 0.9158750176429749, "step": 2298 }, { "epoch": 0.8102202643171806, "grad_norm": 1.718215094287951, "learning_rate": 1.3869855700024031e-05, "loss": 0.7395786643028259, "step": 2299 }, { "epoch": 0.8105726872246696, "grad_norm": 1.6360185397225708, "learning_rate": 1.3864482606752922e-05, "loss": 0.594106912612915, "step": 2300 }, { "epoch": 0.8109251101321586, "grad_norm": 1.6395747499474045, "learning_rate": 1.3859108201610236e-05, "loss": 0.7853089570999146, "step": 2301 }, { "epoch": 0.8112775330396476, "grad_norm": 1.6313227134249062, "learning_rate": 1.3853732486420413e-05, "loss": 0.8346991539001465, "step": 2302 }, { "epoch": 0.8116299559471366, "grad_norm": 1.6254363131857819, "learning_rate": 1.3848355463008344e-05, "loss": 0.5493819117546082, "step": 2303 }, { "epoch": 0.8119823788546255, "grad_norm": 1.566621350016491, "learning_rate": 1.3842977133199363e-05, "loss": 0.7474828958511353, "step": 2304 }, { "epoch": 0.8123348017621146, "grad_norm": 1.6648296076023164, "learning_rate": 1.3837597498819242e-05, "loss": 0.6599621772766113, "step": 2305 }, { "epoch": 0.8126872246696035, "grad_norm": 1.5217466732352583, "learning_rate": 1.38322165616942e-05, "loss": 0.6751214861869812, "step": 2306 }, { "epoch": 0.8130396475770925, "grad_norm": 1.720054765999457, "learning_rate": 1.3826834323650899e-05, "loss": 0.7450453042984009, "step": 2307 }, { "epoch": 0.8133920704845815, "grad_norm": 1.4739637914592345, "learning_rate": 1.382145078651644e-05, "loss": 0.7015345692634583, "step": 2308 }, { "epoch": 0.8137444933920704, "grad_norm": 1.4921910425897076, "learning_rate": 1.3816065952118368e-05, "loss": 0.7161329984664917, "step": 2309 }, { "epoch": 0.8140969162995595, "grad_norm": 1.576440929020717, "learning_rate": 1.3810679822284665e-05, "loss": 0.771783709526062, "step": 2310 }, { "epoch": 0.8144493392070484, "grad_norm": 1.461165164266228, "learning_rate": 1.3805292398843755e-05, "loss": 0.6710794568061829, "step": 2311 }, { "epoch": 0.8148017621145375, "grad_norm": 1.6256312715940777, "learning_rate": 1.3799903683624503e-05, "loss": 0.6614924669265747, "step": 2312 }, { "epoch": 0.8151541850220264, "grad_norm": 1.429649360127197, "learning_rate": 1.3794513678456203e-05, "loss": 0.6432225704193115, "step": 2313 }, { "epoch": 0.8155066079295155, "grad_norm": 1.233784916709085, "learning_rate": 1.3789122385168604e-05, "loss": 0.6228311061859131, "step": 2314 }, { "epoch": 0.8158590308370044, "grad_norm": 1.5182036065920572, "learning_rate": 1.3783729805591875e-05, "loss": 0.5597498416900635, "step": 2315 }, { "epoch": 0.8162114537444934, "grad_norm": 1.954667780900904, "learning_rate": 1.3778335941556629e-05, "loss": 0.7651177048683167, "step": 2316 }, { "epoch": 0.8165638766519824, "grad_norm": 1.3053642347729657, "learning_rate": 1.3772940794893916e-05, "loss": 0.5482406616210938, "step": 2317 }, { "epoch": 0.8169162995594713, "grad_norm": 1.4432389735878668, "learning_rate": 1.3767544367435229e-05, "loss": 0.767236590385437, "step": 2318 }, { "epoch": 0.8172687224669604, "grad_norm": 1.7071036751428772, "learning_rate": 1.3762146661012471e-05, "loss": 0.705253541469574, "step": 2319 }, { "epoch": 0.8176211453744493, "grad_norm": 1.4969645559129943, "learning_rate": 1.3756747677458008e-05, "loss": 0.7800463438034058, "step": 2320 }, { "epoch": 0.8179735682819383, "grad_norm": 1.6172262621918039, "learning_rate": 1.3751347418604623e-05, "loss": 0.7615088224411011, "step": 2321 }, { "epoch": 0.8183259911894273, "grad_norm": 1.6932314886464006, "learning_rate": 1.3745945886285536e-05, "loss": 0.8004297614097595, "step": 2322 }, { "epoch": 0.8186784140969163, "grad_norm": 1.605867375121777, "learning_rate": 1.3740543082334399e-05, "loss": 0.6428912281990051, "step": 2323 }, { "epoch": 0.8190308370044053, "grad_norm": 1.4147620040703779, "learning_rate": 1.3735139008585294e-05, "loss": 0.6702802777290344, "step": 2324 }, { "epoch": 0.8193832599118943, "grad_norm": 1.3127203907182126, "learning_rate": 1.3729733666872736e-05, "loss": 0.6003440022468567, "step": 2325 }, { "epoch": 0.8197356828193832, "grad_norm": 2.04633486984075, "learning_rate": 1.3724327059031677e-05, "loss": 0.8264240622520447, "step": 2326 }, { "epoch": 0.8200881057268723, "grad_norm": 1.4037319277657845, "learning_rate": 1.3718919186897481e-05, "loss": 0.6974462866783142, "step": 2327 }, { "epoch": 0.8204405286343612, "grad_norm": 1.7081986923623933, "learning_rate": 1.3713510052305962e-05, "loss": 0.8273947238922119, "step": 2328 }, { "epoch": 0.8207929515418502, "grad_norm": 1.5000401588722418, "learning_rate": 1.3708099657093348e-05, "loss": 0.6230529546737671, "step": 2329 }, { "epoch": 0.8211453744493392, "grad_norm": 1.6377312790274685, "learning_rate": 1.37026880030963e-05, "loss": 0.6997084021568298, "step": 2330 }, { "epoch": 0.8214977973568282, "grad_norm": 1.582616740422673, "learning_rate": 1.3697275092151908e-05, "loss": 0.7212036848068237, "step": 2331 }, { "epoch": 0.8218502202643172, "grad_norm": 1.5449017822829925, "learning_rate": 1.3691860926097685e-05, "loss": 0.7758737206459045, "step": 2332 }, { "epoch": 0.8222026431718061, "grad_norm": 1.7784238395856364, "learning_rate": 1.368644550677157e-05, "loss": 0.62369704246521, "step": 2333 }, { "epoch": 0.8225550660792952, "grad_norm": 1.6110908974677367, "learning_rate": 1.3681028836011935e-05, "loss": 0.8051841855049133, "step": 2334 }, { "epoch": 0.8229074889867841, "grad_norm": 1.3626761635443752, "learning_rate": 1.3675610915657568e-05, "loss": 0.6087243556976318, "step": 2335 }, { "epoch": 0.8232599118942732, "grad_norm": 1.9382202981470131, "learning_rate": 1.3670191747547685e-05, "loss": 0.6949581503868103, "step": 2336 }, { "epoch": 0.8236123348017621, "grad_norm": 1.5451121537596906, "learning_rate": 1.3664771333521922e-05, "loss": 0.5621528029441833, "step": 2337 }, { "epoch": 0.8239647577092511, "grad_norm": 1.622327701652298, "learning_rate": 1.3659349675420346e-05, "loss": 0.8731498718261719, "step": 2338 }, { "epoch": 0.8243171806167401, "grad_norm": 1.5570249925953572, "learning_rate": 1.3653926775083437e-05, "loss": 0.6997240781784058, "step": 2339 }, { "epoch": 0.824669603524229, "grad_norm": 1.6562463291138314, "learning_rate": 1.3648502634352104e-05, "loss": 0.8061426877975464, "step": 2340 }, { "epoch": 0.8250220264317181, "grad_norm": 1.7061312576253802, "learning_rate": 1.3643077255067667e-05, "loss": 0.6186845302581787, "step": 2341 }, { "epoch": 0.825374449339207, "grad_norm": 1.6605971928200247, "learning_rate": 1.3637650639071884e-05, "loss": 0.8098937273025513, "step": 2342 }, { "epoch": 0.825726872246696, "grad_norm": 1.6091516027269386, "learning_rate": 1.3632222788206916e-05, "loss": 0.5810271501541138, "step": 2343 }, { "epoch": 0.826079295154185, "grad_norm": 1.4965459276387059, "learning_rate": 1.3626793704315348e-05, "loss": 0.48309600353240967, "step": 2344 }, { "epoch": 0.826431718061674, "grad_norm": 1.4326274242229946, "learning_rate": 1.3621363389240188e-05, "loss": 0.7366980314254761, "step": 2345 }, { "epoch": 0.826784140969163, "grad_norm": 1.571199172280502, "learning_rate": 1.3615931844824859e-05, "loss": 0.6572252511978149, "step": 2346 }, { "epoch": 0.827136563876652, "grad_norm": 1.3078300281358257, "learning_rate": 1.3610499072913204e-05, "loss": 0.6776653528213501, "step": 2347 }, { "epoch": 0.827488986784141, "grad_norm": 1.772641440888185, "learning_rate": 1.3605065075349473e-05, "loss": 0.6536053419113159, "step": 2348 }, { "epoch": 0.82784140969163, "grad_norm": 1.600184025362065, "learning_rate": 1.3599629853978342e-05, "loss": 0.7000117301940918, "step": 2349 }, { "epoch": 0.8281938325991189, "grad_norm": 1.5533713409132957, "learning_rate": 1.3594193410644902e-05, "loss": 0.6480045318603516, "step": 2350 }, { "epoch": 0.8285462555066079, "grad_norm": 1.5474076871693587, "learning_rate": 1.3588755747194656e-05, "loss": 0.6428179740905762, "step": 2351 }, { "epoch": 0.8288986784140969, "grad_norm": 1.3886734182652174, "learning_rate": 1.3583316865473517e-05, "loss": 0.618633508682251, "step": 2352 }, { "epoch": 0.8292511013215859, "grad_norm": 1.5946423674864716, "learning_rate": 1.357787676732782e-05, "loss": 0.7289671897888184, "step": 2353 }, { "epoch": 0.8296035242290749, "grad_norm": 1.687058159970245, "learning_rate": 1.3572435454604307e-05, "loss": 0.6969538927078247, "step": 2354 }, { "epoch": 0.8299559471365638, "grad_norm": 1.565248379514886, "learning_rate": 1.3566992929150137e-05, "loss": 0.8490859270095825, "step": 2355 }, { "epoch": 0.8303083700440529, "grad_norm": 1.532906793366292, "learning_rate": 1.3561549192812877e-05, "loss": 0.6883271336555481, "step": 2356 }, { "epoch": 0.8306607929515418, "grad_norm": 1.3151000902691472, "learning_rate": 1.3556104247440504e-05, "loss": 0.68092280626297, "step": 2357 }, { "epoch": 0.8310132158590309, "grad_norm": 1.2591886658215548, "learning_rate": 1.3550658094881413e-05, "loss": 0.7077454924583435, "step": 2358 }, { "epoch": 0.8313656387665198, "grad_norm": 1.5452673483096302, "learning_rate": 1.3545210736984393e-05, "loss": 0.7364591360092163, "step": 2359 }, { "epoch": 0.8317180616740089, "grad_norm": 1.4999509926023873, "learning_rate": 1.3539762175598666e-05, "loss": 0.8047930002212524, "step": 2360 }, { "epoch": 0.8320704845814978, "grad_norm": 1.4862380654794773, "learning_rate": 1.3534312412573836e-05, "loss": 0.7717781066894531, "step": 2361 }, { "epoch": 0.8324229074889867, "grad_norm": 1.7032828917925678, "learning_rate": 1.3528861449759938e-05, "loss": 0.7228613495826721, "step": 2362 }, { "epoch": 0.8327753303964758, "grad_norm": 1.5752771060390574, "learning_rate": 1.3523409289007399e-05, "loss": 0.8025436401367188, "step": 2363 }, { "epoch": 0.8331277533039647, "grad_norm": 1.5214524176303228, "learning_rate": 1.3517955932167057e-05, "loss": 0.6653664112091064, "step": 2364 }, { "epoch": 0.8334801762114538, "grad_norm": 1.4409217046848606, "learning_rate": 1.3512501381090158e-05, "loss": 0.709527313709259, "step": 2365 }, { "epoch": 0.8338325991189427, "grad_norm": 1.4678807653581447, "learning_rate": 1.3507045637628355e-05, "loss": 0.7317520380020142, "step": 2366 }, { "epoch": 0.8341850220264317, "grad_norm": 1.4520344718636113, "learning_rate": 1.3501588703633703e-05, "loss": 0.734069287776947, "step": 2367 }, { "epoch": 0.8345374449339207, "grad_norm": 1.355050784601881, "learning_rate": 1.349613058095866e-05, "loss": 0.5950552225112915, "step": 2368 }, { "epoch": 0.8348898678414097, "grad_norm": 1.3916802158941735, "learning_rate": 1.3490671271456084e-05, "loss": 0.5958857536315918, "step": 2369 }, { "epoch": 0.8352422907488987, "grad_norm": 1.319860830071963, "learning_rate": 1.348521077697925e-05, "loss": 0.7094449996948242, "step": 2370 }, { "epoch": 0.8355947136563877, "grad_norm": 1.283824481194398, "learning_rate": 1.3479749099381818e-05, "loss": 0.6260385513305664, "step": 2371 }, { "epoch": 0.8359471365638766, "grad_norm": 1.3546760632082742, "learning_rate": 1.3474286240517862e-05, "loss": 0.65608811378479, "step": 2372 }, { "epoch": 0.8362995594713656, "grad_norm": 1.5902013950729095, "learning_rate": 1.346882220224185e-05, "loss": 0.6942586898803711, "step": 2373 }, { "epoch": 0.8366519823788546, "grad_norm": 1.5432700710308092, "learning_rate": 1.3463356986408653e-05, "loss": 0.6831374168395996, "step": 2374 }, { "epoch": 0.8370044052863436, "grad_norm": 1.2453712902306997, "learning_rate": 1.3457890594873546e-05, "loss": 0.6363790035247803, "step": 2375 }, { "epoch": 0.8373568281938326, "grad_norm": 1.4407831477600082, "learning_rate": 1.3452423029492194e-05, "loss": 0.698935866355896, "step": 2376 }, { "epoch": 0.8377092511013216, "grad_norm": 1.6516160077651472, "learning_rate": 1.3446954292120667e-05, "loss": 0.8569005727767944, "step": 2377 }, { "epoch": 0.8380616740088106, "grad_norm": 1.4963554673760426, "learning_rate": 1.3441484384615428e-05, "loss": 0.8461613655090332, "step": 2378 }, { "epoch": 0.8384140969162995, "grad_norm": 1.635336062215313, "learning_rate": 1.343601330883335e-05, "loss": 0.7481078505516052, "step": 2379 }, { "epoch": 0.8387665198237886, "grad_norm": 1.1164155853725835, "learning_rate": 1.343054106663168e-05, "loss": 0.5632544755935669, "step": 2380 }, { "epoch": 0.8391189427312775, "grad_norm": 1.2387886339726162, "learning_rate": 1.3425067659868084e-05, "loss": 0.528980016708374, "step": 2381 }, { "epoch": 0.8394713656387666, "grad_norm": 1.2987181937645196, "learning_rate": 1.341959309040061e-05, "loss": 0.5520849227905273, "step": 2382 }, { "epoch": 0.8398237885462555, "grad_norm": 1.1709661282123542, "learning_rate": 1.34141173600877e-05, "loss": 0.569744348526001, "step": 2383 }, { "epoch": 0.8401762114537445, "grad_norm": 1.1526596958180186, "learning_rate": 1.3408640470788202e-05, "loss": 0.595065712928772, "step": 2384 }, { "epoch": 0.8405286343612335, "grad_norm": 1.716530250506247, "learning_rate": 1.3403162424361342e-05, "loss": 0.6993277072906494, "step": 2385 }, { "epoch": 0.8408810572687224, "grad_norm": 1.467497517918387, "learning_rate": 1.3397683222666748e-05, "loss": 0.6183342933654785, "step": 2386 }, { "epoch": 0.8412334801762115, "grad_norm": 1.5660447986557493, "learning_rate": 1.339220286756444e-05, "loss": 0.7280797362327576, "step": 2387 }, { "epoch": 0.8415859030837004, "grad_norm": 1.5538390945999534, "learning_rate": 1.3386721360914829e-05, "loss": 0.7377837896347046, "step": 2388 }, { "epoch": 0.8419383259911895, "grad_norm": 1.3658202604001934, "learning_rate": 1.3381238704578718e-05, "loss": 0.7202758193016052, "step": 2389 }, { "epoch": 0.8422907488986784, "grad_norm": 1.4864419338323784, "learning_rate": 1.3375754900417291e-05, "loss": 0.5899994969367981, "step": 2390 }, { "epoch": 0.8426431718061674, "grad_norm": 1.6545749228929092, "learning_rate": 1.3370269950292133e-05, "loss": 0.8128558993339539, "step": 2391 }, { "epoch": 0.8429955947136564, "grad_norm": 1.4863580222240895, "learning_rate": 1.3364783856065213e-05, "loss": 0.8222962617874146, "step": 2392 }, { "epoch": 0.8433480176211454, "grad_norm": 1.5392010225603865, "learning_rate": 1.3359296619598894e-05, "loss": 0.7898896932601929, "step": 2393 }, { "epoch": 0.8437004405286344, "grad_norm": 1.59106154269148, "learning_rate": 1.3353808242755912e-05, "loss": 0.6596726179122925, "step": 2394 }, { "epoch": 0.8440528634361234, "grad_norm": 1.6652244607977948, "learning_rate": 1.3348318727399411e-05, "loss": 0.8073080778121948, "step": 2395 }, { "epoch": 0.8444052863436123, "grad_norm": 1.582055504815832, "learning_rate": 1.3342828075392902e-05, "loss": 0.6640043258666992, "step": 2396 }, { "epoch": 0.8447577092511013, "grad_norm": 1.415789065826391, "learning_rate": 1.3337336288600297e-05, "loss": 0.6067632436752319, "step": 2397 }, { "epoch": 0.8451101321585903, "grad_norm": 1.308177796408265, "learning_rate": 1.3331843368885882e-05, "loss": 0.6891398429870605, "step": 2398 }, { "epoch": 0.8454625550660793, "grad_norm": 1.276250238749864, "learning_rate": 1.3326349318114335e-05, "loss": 0.6007423996925354, "step": 2399 }, { "epoch": 0.8458149779735683, "grad_norm": 1.6159836309404996, "learning_rate": 1.3320854138150712e-05, "loss": 0.7314017415046692, "step": 2400 }, { "epoch": 0.8461674008810572, "grad_norm": 1.5060027308979995, "learning_rate": 1.3315357830860461e-05, "loss": 0.7352335453033447, "step": 2401 }, { "epoch": 0.8465198237885463, "grad_norm": 1.3629774951204896, "learning_rate": 1.3309860398109402e-05, "loss": 0.6546785831451416, "step": 2402 }, { "epoch": 0.8468722466960352, "grad_norm": 1.4629106252693242, "learning_rate": 1.3304361841763746e-05, "loss": 0.590252697467804, "step": 2403 }, { "epoch": 0.8472246696035243, "grad_norm": 1.5501476697602834, "learning_rate": 1.3298862163690078e-05, "loss": 0.6864089369773865, "step": 2404 }, { "epoch": 0.8475770925110132, "grad_norm": 1.452376737172979, "learning_rate": 1.3293361365755373e-05, "loss": 0.7818390130996704, "step": 2405 }, { "epoch": 0.8479295154185023, "grad_norm": 1.9084475381981967, "learning_rate": 1.3287859449826977e-05, "loss": 0.7461166381835938, "step": 2406 }, { "epoch": 0.8482819383259912, "grad_norm": 1.7337796671611372, "learning_rate": 1.3282356417772618e-05, "loss": 0.7519750595092773, "step": 2407 }, { "epoch": 0.8486343612334801, "grad_norm": 1.445619912428175, "learning_rate": 1.3276852271460406e-05, "loss": 0.7041791081428528, "step": 2408 }, { "epoch": 0.8489867841409692, "grad_norm": 1.3131157575910486, "learning_rate": 1.327134701275883e-05, "loss": 0.5649428367614746, "step": 2409 }, { "epoch": 0.8493392070484581, "grad_norm": 1.838398891045019, "learning_rate": 1.3265840643536746e-05, "loss": 0.6607545614242554, "step": 2410 }, { "epoch": 0.8496916299559472, "grad_norm": 1.590568626194504, "learning_rate": 1.3260333165663406e-05, "loss": 0.7393547892570496, "step": 2411 }, { "epoch": 0.8500440528634361, "grad_norm": 1.660269046740627, "learning_rate": 1.325482458100842e-05, "loss": 0.6550742387771606, "step": 2412 }, { "epoch": 0.8503964757709251, "grad_norm": 1.3409806360783354, "learning_rate": 1.324931489144178e-05, "loss": 0.5104576349258423, "step": 2413 }, { "epoch": 0.8507488986784141, "grad_norm": 1.7056036938051933, "learning_rate": 1.3243804098833859e-05, "loss": 0.7679733037948608, "step": 2414 }, { "epoch": 0.8511013215859031, "grad_norm": 1.3058704920771766, "learning_rate": 1.3238292205055397e-05, "loss": 0.6516377925872803, "step": 2415 }, { "epoch": 0.8514537444933921, "grad_norm": 1.4749751578789572, "learning_rate": 1.3232779211977509e-05, "loss": 0.8509281277656555, "step": 2416 }, { "epoch": 0.8518061674008811, "grad_norm": 1.6532741255389543, "learning_rate": 1.3227265121471691e-05, "loss": 0.5643317103385925, "step": 2417 }, { "epoch": 0.85215859030837, "grad_norm": 1.4681710603298503, "learning_rate": 1.3221749935409798e-05, "loss": 0.5294302105903625, "step": 2418 }, { "epoch": 0.852511013215859, "grad_norm": 1.4914498870655002, "learning_rate": 1.3216233655664067e-05, "loss": 0.6301594972610474, "step": 2419 }, { "epoch": 0.852863436123348, "grad_norm": 1.399957922496421, "learning_rate": 1.32107162841071e-05, "loss": 0.6930294036865234, "step": 2420 }, { "epoch": 0.853215859030837, "grad_norm": 1.4069779391578274, "learning_rate": 1.3205197822611876e-05, "loss": 0.6266883611679077, "step": 2421 }, { "epoch": 0.853568281938326, "grad_norm": 1.7817063662748283, "learning_rate": 1.3199678273051743e-05, "loss": 0.7789868116378784, "step": 2422 }, { "epoch": 0.853920704845815, "grad_norm": 1.3387299141459739, "learning_rate": 1.3194157637300416e-05, "loss": 0.7148274779319763, "step": 2423 }, { "epoch": 0.854273127753304, "grad_norm": 1.4757263125304436, "learning_rate": 1.3188635917231972e-05, "loss": 0.550403356552124, "step": 2424 }, { "epoch": 0.8546255506607929, "grad_norm": 1.563076871593329, "learning_rate": 1.3183113114720872e-05, "loss": 0.6650338768959045, "step": 2425 }, { "epoch": 0.854977973568282, "grad_norm": 1.569123753374588, "learning_rate": 1.317758923164193e-05, "loss": 0.7774436473846436, "step": 2426 }, { "epoch": 0.8553303964757709, "grad_norm": 1.407079429107656, "learning_rate": 1.3172064269870335e-05, "loss": 0.6192025542259216, "step": 2427 }, { "epoch": 0.85568281938326, "grad_norm": 1.6230407627498116, "learning_rate": 1.3166538231281635e-05, "loss": 0.6758309602737427, "step": 2428 }, { "epoch": 0.8560352422907489, "grad_norm": 1.6026256588862147, "learning_rate": 1.3161011117751756e-05, "loss": 0.7311116456985474, "step": 2429 }, { "epoch": 0.8563876651982378, "grad_norm": 1.797024553793142, "learning_rate": 1.3155482931156977e-05, "loss": 0.7525666952133179, "step": 2430 }, { "epoch": 0.8567400881057269, "grad_norm": 1.7067244433524313, "learning_rate": 1.3149953673373945e-05, "loss": 0.6903671026229858, "step": 2431 }, { "epoch": 0.8570925110132158, "grad_norm": 1.2833360218942749, "learning_rate": 1.314442334627967e-05, "loss": 0.6036638021469116, "step": 2432 }, { "epoch": 0.8574449339207049, "grad_norm": 1.6354054518430503, "learning_rate": 1.3138891951751526e-05, "loss": 0.6490209698677063, "step": 2433 }, { "epoch": 0.8577973568281938, "grad_norm": 1.6970156912379664, "learning_rate": 1.3133359491667252e-05, "loss": 0.692024290561676, "step": 2434 }, { "epoch": 0.8581497797356828, "grad_norm": 1.4031255607051936, "learning_rate": 1.3127825967904944e-05, "loss": 0.6977943181991577, "step": 2435 }, { "epoch": 0.8585022026431718, "grad_norm": 1.3842045822286646, "learning_rate": 1.312229138234306e-05, "loss": 0.625649094581604, "step": 2436 }, { "epoch": 0.8588546255506608, "grad_norm": 1.5910466082409926, "learning_rate": 1.3116755736860422e-05, "loss": 0.671939492225647, "step": 2437 }, { "epoch": 0.8592070484581498, "grad_norm": 1.3856883940296008, "learning_rate": 1.3111219033336211e-05, "loss": 0.700029194355011, "step": 2438 }, { "epoch": 0.8595594713656388, "grad_norm": 1.3907118477619378, "learning_rate": 1.3105681273649959e-05, "loss": 0.6339718699455261, "step": 2439 }, { "epoch": 0.8599118942731278, "grad_norm": 1.306943148235595, "learning_rate": 1.3100142459681569e-05, "loss": 0.7105488777160645, "step": 2440 }, { "epoch": 0.8602643171806167, "grad_norm": 1.4503861250177865, "learning_rate": 1.3094602593311294e-05, "loss": 0.616797924041748, "step": 2441 }, { "epoch": 0.8606167400881057, "grad_norm": 1.5110286813274958, "learning_rate": 1.3089061676419746e-05, "loss": 0.7167524099349976, "step": 2442 }, { "epoch": 0.8609691629955947, "grad_norm": 1.5215961993133658, "learning_rate": 1.3083519710887895e-05, "loss": 0.5499090552330017, "step": 2443 }, { "epoch": 0.8613215859030837, "grad_norm": 1.4623789546240658, "learning_rate": 1.3077976698597064e-05, "loss": 0.5764151811599731, "step": 2444 }, { "epoch": 0.8616740088105727, "grad_norm": 1.438510619597336, "learning_rate": 1.3072432641428931e-05, "loss": 0.7171419858932495, "step": 2445 }, { "epoch": 0.8620264317180617, "grad_norm": 1.3023250448197168, "learning_rate": 1.3066887541265539e-05, "loss": 0.7546026706695557, "step": 2446 }, { "epoch": 0.8623788546255506, "grad_norm": 1.2250371592811133, "learning_rate": 1.306134139998927e-05, "loss": 0.5884296298027039, "step": 2447 }, { "epoch": 0.8627312775330397, "grad_norm": 1.3135127283076564, "learning_rate": 1.3055794219482867e-05, "loss": 0.6877926588058472, "step": 2448 }, { "epoch": 0.8630837004405286, "grad_norm": 1.5935068741769265, "learning_rate": 1.3050246001629425e-05, "loss": 0.598037838935852, "step": 2449 }, { "epoch": 0.8634361233480177, "grad_norm": 1.4128431939298278, "learning_rate": 1.3044696748312395e-05, "loss": 0.6560795307159424, "step": 2450 }, { "epoch": 0.8637885462555066, "grad_norm": 1.5856094022002207, "learning_rate": 1.3039146461415575e-05, "loss": 0.7130829691886902, "step": 2451 }, { "epoch": 0.8641409691629955, "grad_norm": 1.9167144031452974, "learning_rate": 1.303359514282311e-05, "loss": 0.7402251958847046, "step": 2452 }, { "epoch": 0.8644933920704846, "grad_norm": 1.4143817039312587, "learning_rate": 1.3028042794419502e-05, "loss": 0.6610683798789978, "step": 2453 }, { "epoch": 0.8648458149779735, "grad_norm": 1.6544654323663863, "learning_rate": 1.3022489418089606e-05, "loss": 0.84892737865448, "step": 2454 }, { "epoch": 0.8651982378854626, "grad_norm": 1.689285386487206, "learning_rate": 1.3016935015718612e-05, "loss": 0.7285948991775513, "step": 2455 }, { "epoch": 0.8655506607929515, "grad_norm": 1.46262615014944, "learning_rate": 1.3011379589192074e-05, "loss": 0.6800004839897156, "step": 2456 }, { "epoch": 0.8659030837004406, "grad_norm": 1.492659523558787, "learning_rate": 1.3005823140395878e-05, "loss": 0.618618369102478, "step": 2457 }, { "epoch": 0.8662555066079295, "grad_norm": 1.8084387802865425, "learning_rate": 1.3000265671216278e-05, "loss": 0.7657757997512817, "step": 2458 }, { "epoch": 0.8666079295154185, "grad_norm": 1.5490708834885107, "learning_rate": 1.2994707183539848e-05, "loss": 0.7814151644706726, "step": 2459 }, { "epoch": 0.8669603524229075, "grad_norm": 1.2899412950022648, "learning_rate": 1.2989147679253531e-05, "loss": 0.6494930982589722, "step": 2460 }, { "epoch": 0.8673127753303965, "grad_norm": 1.5543724658760723, "learning_rate": 1.2983587160244602e-05, "loss": 0.6498425006866455, "step": 2461 }, { "epoch": 0.8676651982378855, "grad_norm": 1.5210228165977844, "learning_rate": 1.2978025628400684e-05, "loss": 0.635313093662262, "step": 2462 }, { "epoch": 0.8680176211453744, "grad_norm": 1.500755936886382, "learning_rate": 1.2972463085609744e-05, "loss": 0.6892971992492676, "step": 2463 }, { "epoch": 0.8683700440528634, "grad_norm": 1.3872566957567176, "learning_rate": 1.2966899533760095e-05, "loss": 0.691922128200531, "step": 2464 }, { "epoch": 0.8687224669603524, "grad_norm": 1.773327696286038, "learning_rate": 1.2961334974740386e-05, "loss": 0.5764378309249878, "step": 2465 }, { "epoch": 0.8690748898678414, "grad_norm": 1.6231464224655543, "learning_rate": 1.2955769410439616e-05, "loss": 0.8193005919456482, "step": 2466 }, { "epoch": 0.8694273127753304, "grad_norm": 1.4243504226778951, "learning_rate": 1.2950202842747115e-05, "loss": 0.6141501665115356, "step": 2467 }, { "epoch": 0.8697797356828194, "grad_norm": 1.5061592811010869, "learning_rate": 1.2944635273552565e-05, "loss": 0.7464454174041748, "step": 2468 }, { "epoch": 0.8701321585903083, "grad_norm": 1.3349759192393535, "learning_rate": 1.293906670474598e-05, "loss": 0.5970025062561035, "step": 2469 }, { "epoch": 0.8704845814977974, "grad_norm": 1.6022434524431073, "learning_rate": 1.2933497138217714e-05, "loss": 0.7247673273086548, "step": 2470 }, { "epoch": 0.8708370044052863, "grad_norm": 1.535051650641408, "learning_rate": 1.2927926575858463e-05, "loss": 0.746272087097168, "step": 2471 }, { "epoch": 0.8711894273127754, "grad_norm": 1.5072596947359789, "learning_rate": 1.2922355019559265e-05, "loss": 0.6918776035308838, "step": 2472 }, { "epoch": 0.8715418502202643, "grad_norm": 1.553343209452483, "learning_rate": 1.2916782471211478e-05, "loss": 0.6056039929389954, "step": 2473 }, { "epoch": 0.8718942731277532, "grad_norm": 1.3670048649799473, "learning_rate": 1.2911208932706821e-05, "loss": 0.6699481010437012, "step": 2474 }, { "epoch": 0.8722466960352423, "grad_norm": 1.4719810242076543, "learning_rate": 1.2905634405937327e-05, "loss": 0.5141814947128296, "step": 2475 }, { "epoch": 0.8725991189427312, "grad_norm": 1.5819338229003952, "learning_rate": 1.2900058892795383e-05, "loss": 0.7521284818649292, "step": 2476 }, { "epoch": 0.8729515418502203, "grad_norm": 2.2082732494247916, "learning_rate": 1.2894482395173695e-05, "loss": 0.6878937482833862, "step": 2477 }, { "epoch": 0.8733039647577092, "grad_norm": 1.3942904192465777, "learning_rate": 1.2888904914965317e-05, "loss": 0.5963379144668579, "step": 2478 }, { "epoch": 0.8736563876651983, "grad_norm": 1.7634340153188761, "learning_rate": 1.2883326454063623e-05, "loss": 0.7572320103645325, "step": 2479 }, { "epoch": 0.8740088105726872, "grad_norm": 1.399026210420982, "learning_rate": 1.2877747014362334e-05, "loss": 0.7047982215881348, "step": 2480 }, { "epoch": 0.8743612334801762, "grad_norm": 2.0588397887454715, "learning_rate": 1.2872166597755488e-05, "loss": 0.6449024677276611, "step": 2481 }, { "epoch": 0.8747136563876652, "grad_norm": 1.6446468607591163, "learning_rate": 1.2866585206137469e-05, "loss": 0.7590922117233276, "step": 2482 }, { "epoch": 0.8750660792951542, "grad_norm": 1.6164965426300901, "learning_rate": 1.2861002841402983e-05, "loss": 0.7534210085868835, "step": 2483 }, { "epoch": 0.8754185022026432, "grad_norm": 1.9198456186069754, "learning_rate": 1.2855419505447073e-05, "loss": 0.7091225385665894, "step": 2484 }, { "epoch": 0.8757709251101321, "grad_norm": 1.5347710098555305, "learning_rate": 1.2849835200165104e-05, "loss": 0.7578933835029602, "step": 2485 }, { "epoch": 0.8761233480176212, "grad_norm": 1.3282869408675961, "learning_rate": 1.2844249927452771e-05, "loss": 0.5938349962234497, "step": 2486 }, { "epoch": 0.8764757709251101, "grad_norm": 1.5090052513716286, "learning_rate": 1.2838663689206108e-05, "loss": 0.5726315379142761, "step": 2487 }, { "epoch": 0.8768281938325991, "grad_norm": 1.450396836473225, "learning_rate": 1.2833076487321465e-05, "loss": 0.8181554079055786, "step": 2488 }, { "epoch": 0.8771806167400881, "grad_norm": 1.71919397348368, "learning_rate": 1.2827488323695522e-05, "loss": 0.7465275526046753, "step": 2489 }, { "epoch": 0.8775330396475771, "grad_norm": 1.2623461784182488, "learning_rate": 1.2821899200225288e-05, "loss": 0.6083456873893738, "step": 2490 }, { "epoch": 0.877885462555066, "grad_norm": 1.4922167619772364, "learning_rate": 1.2816309118808095e-05, "loss": 0.6393307447433472, "step": 2491 }, { "epoch": 0.8782378854625551, "grad_norm": 1.3846980777960398, "learning_rate": 1.2810718081341604e-05, "loss": 0.6562504768371582, "step": 2492 }, { "epoch": 0.878590308370044, "grad_norm": 1.5590691123255283, "learning_rate": 1.2805126089723798e-05, "loss": 0.6737300753593445, "step": 2493 }, { "epoch": 0.8789427312775331, "grad_norm": 1.7724399876158112, "learning_rate": 1.2799533145852982e-05, "loss": 0.6246815919876099, "step": 2494 }, { "epoch": 0.879295154185022, "grad_norm": 1.7718655540042538, "learning_rate": 1.2793939251627788e-05, "loss": 0.7499577403068542, "step": 2495 }, { "epoch": 0.8796475770925111, "grad_norm": 1.6628095797742937, "learning_rate": 1.2788344408947171e-05, "loss": 0.7645655870437622, "step": 2496 }, { "epoch": 0.88, "grad_norm": 1.732888201165417, "learning_rate": 1.27827486197104e-05, "loss": 0.7407524585723877, "step": 2497 }, { "epoch": 0.8803524229074889, "grad_norm": 1.590151572985607, "learning_rate": 1.2777151885817078e-05, "loss": 0.6401108503341675, "step": 2498 }, { "epoch": 0.880704845814978, "grad_norm": 1.5984459598023502, "learning_rate": 1.2771554209167116e-05, "loss": 0.8332269191741943, "step": 2499 }, { "epoch": 0.8810572687224669, "grad_norm": 1.61859187638703, "learning_rate": 1.2765955591660757e-05, "loss": 0.7677830457687378, "step": 2500 }, { "epoch": 0.881409691629956, "grad_norm": 1.4420535275594295, "learning_rate": 1.2760356035198553e-05, "loss": 0.8532943725585938, "step": 2501 }, { "epoch": 0.8817621145374449, "grad_norm": 1.3662949943021319, "learning_rate": 1.2754755541681384e-05, "loss": 0.6287009716033936, "step": 2502 }, { "epoch": 0.882114537444934, "grad_norm": 1.38981570117233, "learning_rate": 1.2749154113010432e-05, "loss": 0.7039133310317993, "step": 2503 }, { "epoch": 0.8824669603524229, "grad_norm": 1.6518390089780828, "learning_rate": 1.2743551751087222e-05, "loss": 0.6959357857704163, "step": 2504 }, { "epoch": 0.8828193832599119, "grad_norm": 1.3554006828606007, "learning_rate": 1.2737948457813571e-05, "loss": 0.6862938404083252, "step": 2505 }, { "epoch": 0.8831718061674009, "grad_norm": 1.6773466383223146, "learning_rate": 1.273234423509163e-05, "loss": 0.6903352737426758, "step": 2506 }, { "epoch": 0.8835242290748899, "grad_norm": 1.374322606051121, "learning_rate": 1.2726739084823851e-05, "loss": 0.7226145267486572, "step": 2507 }, { "epoch": 0.8838766519823789, "grad_norm": 1.4091144718113782, "learning_rate": 1.2721133008913015e-05, "loss": 0.7865043878555298, "step": 2508 }, { "epoch": 0.8842290748898678, "grad_norm": 1.4501170174913356, "learning_rate": 1.2715526009262209e-05, "loss": 0.6594572067260742, "step": 2509 }, { "epoch": 0.8845814977973568, "grad_norm": 1.3500042347590218, "learning_rate": 1.270991808777483e-05, "loss": 0.5967481136322021, "step": 2510 }, { "epoch": 0.8849339207048458, "grad_norm": 1.3600104271689806, "learning_rate": 1.2704309246354599e-05, "loss": 0.7843632698059082, "step": 2511 }, { "epoch": 0.8852863436123348, "grad_norm": 1.3543191802484777, "learning_rate": 1.2698699486905538e-05, "loss": 0.7475506067276001, "step": 2512 }, { "epoch": 0.8856387665198238, "grad_norm": 1.4881501151953718, "learning_rate": 1.2693088811331987e-05, "loss": 0.8082534670829773, "step": 2513 }, { "epoch": 0.8859911894273128, "grad_norm": 1.6899694353159702, "learning_rate": 1.2687477221538598e-05, "loss": 0.7421785593032837, "step": 2514 }, { "epoch": 0.8863436123348017, "grad_norm": 1.295151070825849, "learning_rate": 1.2681864719430328e-05, "loss": 0.6268718242645264, "step": 2515 }, { "epoch": 0.8866960352422908, "grad_norm": 1.595396389533138, "learning_rate": 1.2676251306912448e-05, "loss": 0.7285459041595459, "step": 2516 }, { "epoch": 0.8870484581497797, "grad_norm": 1.4826705601530517, "learning_rate": 1.2670636985890542e-05, "loss": 0.6132184267044067, "step": 2517 }, { "epoch": 0.8874008810572688, "grad_norm": 1.4018565352445778, "learning_rate": 1.2665021758270488e-05, "loss": 0.5550754070281982, "step": 2518 }, { "epoch": 0.8877533039647577, "grad_norm": 1.3628132273232696, "learning_rate": 1.2659405625958488e-05, "loss": 0.5357390642166138, "step": 2519 }, { "epoch": 0.8881057268722466, "grad_norm": 1.4153066703364516, "learning_rate": 1.2653788590861039e-05, "loss": 0.5858328342437744, "step": 2520 }, { "epoch": 0.8884581497797357, "grad_norm": 1.731815068535558, "learning_rate": 1.2648170654884955e-05, "loss": 0.7109283208847046, "step": 2521 }, { "epoch": 0.8888105726872246, "grad_norm": 1.9753429482306435, "learning_rate": 1.2642551819937348e-05, "loss": 0.808137834072113, "step": 2522 }, { "epoch": 0.8891629955947137, "grad_norm": 1.6385693606484741, "learning_rate": 1.2636932087925637e-05, "loss": 0.587998628616333, "step": 2523 }, { "epoch": 0.8895154185022026, "grad_norm": 1.4234526769499198, "learning_rate": 1.2631311460757545e-05, "loss": 0.5555537343025208, "step": 2524 }, { "epoch": 0.8898678414096917, "grad_norm": 1.4118650122814267, "learning_rate": 1.2625689940341102e-05, "loss": 0.641632080078125, "step": 2525 }, { "epoch": 0.8902202643171806, "grad_norm": 1.5401015682174186, "learning_rate": 1.262006752858464e-05, "loss": 0.7005184888839722, "step": 2526 }, { "epoch": 0.8905726872246696, "grad_norm": 1.272518513643159, "learning_rate": 1.2614444227396792e-05, "loss": 0.6907261610031128, "step": 2527 }, { "epoch": 0.8909251101321586, "grad_norm": 1.4162379009723582, "learning_rate": 1.2608820038686492e-05, "loss": 0.5757718086242676, "step": 2528 }, { "epoch": 0.8912775330396476, "grad_norm": 1.888252337049927, "learning_rate": 1.2603194964362979e-05, "loss": 0.6462569832801819, "step": 2529 }, { "epoch": 0.8916299559471366, "grad_norm": 2.6509089623338586, "learning_rate": 1.2597569006335787e-05, "loss": 0.7028999328613281, "step": 2530 }, { "epoch": 0.8919823788546255, "grad_norm": 1.3325876541370223, "learning_rate": 1.2591942166514763e-05, "loss": 0.5789325833320618, "step": 2531 }, { "epoch": 0.8923348017621145, "grad_norm": 1.5373223041612576, "learning_rate": 1.258631444681003e-05, "loss": 0.6545255184173584, "step": 2532 }, { "epoch": 0.8926872246696035, "grad_norm": 1.560686991488605, "learning_rate": 1.258068584913204e-05, "loss": 0.7227469682693481, "step": 2533 }, { "epoch": 0.8930396475770925, "grad_norm": 1.3545909427052794, "learning_rate": 1.2575056375391513e-05, "loss": 0.5985771417617798, "step": 2534 }, { "epoch": 0.8933920704845815, "grad_norm": 1.5422643503857134, "learning_rate": 1.2569426027499485e-05, "loss": 0.6705960035324097, "step": 2535 }, { "epoch": 0.8937444933920705, "grad_norm": 1.5427105799340322, "learning_rate": 1.2563794807367284e-05, "loss": 0.6662027835845947, "step": 2536 }, { "epoch": 0.8940969162995595, "grad_norm": 1.5270286613671318, "learning_rate": 1.2558162716906537e-05, "loss": 0.7742453813552856, "step": 2537 }, { "epoch": 0.8944493392070485, "grad_norm": 1.628032718158035, "learning_rate": 1.255252975802916e-05, "loss": 0.6124528050422668, "step": 2538 }, { "epoch": 0.8948017621145374, "grad_norm": 1.455711423520218, "learning_rate": 1.2546895932647365e-05, "loss": 0.5728615522384644, "step": 2539 }, { "epoch": 0.8951541850220265, "grad_norm": 1.5737389396802581, "learning_rate": 1.2541261242673665e-05, "loss": 0.6347167491912842, "step": 2540 }, { "epoch": 0.8955066079295154, "grad_norm": 1.62324317727844, "learning_rate": 1.2535625690020861e-05, "loss": 0.6350656747817993, "step": 2541 }, { "epoch": 0.8958590308370044, "grad_norm": 1.674339310689998, "learning_rate": 1.2529989276602043e-05, "loss": 0.7538303732872009, "step": 2542 }, { "epoch": 0.8962114537444934, "grad_norm": 1.5900983527544528, "learning_rate": 1.2524352004330607e-05, "loss": 0.8154318928718567, "step": 2543 }, { "epoch": 0.8965638766519823, "grad_norm": 1.4033932104877718, "learning_rate": 1.2518713875120222e-05, "loss": 0.5313037633895874, "step": 2544 }, { "epoch": 0.8969162995594714, "grad_norm": 1.3069539051845793, "learning_rate": 1.2513074890884864e-05, "loss": 0.740921139717102, "step": 2545 }, { "epoch": 0.8972687224669603, "grad_norm": 1.593785966579892, "learning_rate": 1.250743505353879e-05, "loss": 0.6079888343811035, "step": 2546 }, { "epoch": 0.8976211453744494, "grad_norm": 1.266024042192646, "learning_rate": 1.2501794364996553e-05, "loss": 0.46736663579940796, "step": 2547 }, { "epoch": 0.8979735682819383, "grad_norm": 1.5066472302506413, "learning_rate": 1.2496152827172982e-05, "loss": 0.5670880079269409, "step": 2548 }, { "epoch": 0.8983259911894274, "grad_norm": 1.4991563073413907, "learning_rate": 1.2490510441983212e-05, "loss": 0.7845931649208069, "step": 2549 }, { "epoch": 0.8986784140969163, "grad_norm": 1.5458127280177445, "learning_rate": 1.2484867211342653e-05, "loss": 0.5625143647193909, "step": 2550 }, { "epoch": 0.8990308370044053, "grad_norm": 1.5409896244330605, "learning_rate": 1.2479223137167011e-05, "loss": 0.6631217002868652, "step": 2551 }, { "epoch": 0.8993832599118943, "grad_norm": 1.6071757454969378, "learning_rate": 1.247357822137227e-05, "loss": 0.6588548421859741, "step": 2552 }, { "epoch": 0.8997356828193832, "grad_norm": 1.4192601474848106, "learning_rate": 1.24679324658747e-05, "loss": 0.8046029806137085, "step": 2553 }, { "epoch": 0.9000881057268723, "grad_norm": 1.6272051463241026, "learning_rate": 1.2462285872590862e-05, "loss": 0.6651894450187683, "step": 2554 }, { "epoch": 0.9004405286343612, "grad_norm": 1.5179002680249722, "learning_rate": 1.2456638443437605e-05, "loss": 0.5888474583625793, "step": 2555 }, { "epoch": 0.9007929515418502, "grad_norm": 1.7319345866859506, "learning_rate": 1.2450990180332045e-05, "loss": 0.5915735363960266, "step": 2556 }, { "epoch": 0.9011453744493392, "grad_norm": 1.5409991319630119, "learning_rate": 1.24453410851916e-05, "loss": 0.6830431222915649, "step": 2557 }, { "epoch": 0.9014977973568282, "grad_norm": 1.3954767744454935, "learning_rate": 1.2439691159933955e-05, "loss": 0.6812379956245422, "step": 2558 }, { "epoch": 0.9018502202643172, "grad_norm": 1.3481753587360845, "learning_rate": 1.2434040406477092e-05, "loss": 0.6887152791023254, "step": 2559 }, { "epoch": 0.9022026431718062, "grad_norm": 1.495436388275929, "learning_rate": 1.2428388826739254e-05, "loss": 0.677071213722229, "step": 2560 }, { "epoch": 0.9025550660792951, "grad_norm": 1.5809198519920526, "learning_rate": 1.242273642263899e-05, "loss": 0.6635652780532837, "step": 2561 }, { "epoch": 0.9029074889867842, "grad_norm": 1.7455357614962055, "learning_rate": 1.2417083196095105e-05, "loss": 0.7543712854385376, "step": 2562 }, { "epoch": 0.9032599118942731, "grad_norm": 1.743758273604275, "learning_rate": 1.2411429149026701e-05, "loss": 0.6219073534011841, "step": 2563 }, { "epoch": 0.9036123348017621, "grad_norm": 1.360518097358955, "learning_rate": 1.2405774283353144e-05, "loss": 0.6576533317565918, "step": 2564 }, { "epoch": 0.9039647577092511, "grad_norm": 1.3683846685040542, "learning_rate": 1.240011860099409e-05, "loss": 0.6458585262298584, "step": 2565 }, { "epoch": 0.90431718061674, "grad_norm": 1.5753618523282886, "learning_rate": 1.2394462103869464e-05, "loss": 0.6943198442459106, "step": 2566 }, { "epoch": 0.9046696035242291, "grad_norm": 1.5425443594991994, "learning_rate": 1.2388804793899473e-05, "loss": 0.6684235334396362, "step": 2567 }, { "epoch": 0.905022026431718, "grad_norm": 1.4432793187881665, "learning_rate": 1.2383146673004598e-05, "loss": 0.6707017421722412, "step": 2568 }, { "epoch": 0.9053744493392071, "grad_norm": 1.4610510830510222, "learning_rate": 1.2377487743105593e-05, "loss": 0.6009544134140015, "step": 2569 }, { "epoch": 0.905726872246696, "grad_norm": 1.3343070463381261, "learning_rate": 1.2371828006123488e-05, "loss": 0.57770836353302, "step": 2570 }, { "epoch": 0.9060792951541851, "grad_norm": 1.50423514822828, "learning_rate": 1.236616746397959e-05, "loss": 0.6146866083145142, "step": 2571 }, { "epoch": 0.906431718061674, "grad_norm": 1.4060902038910876, "learning_rate": 1.2360506118595476e-05, "loss": 0.6374951601028442, "step": 2572 }, { "epoch": 0.906784140969163, "grad_norm": 1.5006132241656203, "learning_rate": 1.2354843971892998e-05, "loss": 0.6933800578117371, "step": 2573 }, { "epoch": 0.907136563876652, "grad_norm": 1.6402374081466708, "learning_rate": 1.2349181025794278e-05, "loss": 0.857126772403717, "step": 2574 }, { "epoch": 0.9074889867841409, "grad_norm": 1.7970464713795387, "learning_rate": 1.2343517282221704e-05, "loss": 0.7316192388534546, "step": 2575 }, { "epoch": 0.90784140969163, "grad_norm": 1.7338748475900745, "learning_rate": 1.2337852743097947e-05, "loss": 0.7916824817657471, "step": 2576 }, { "epoch": 0.9081938325991189, "grad_norm": 1.342845056559204, "learning_rate": 1.2332187410345941e-05, "loss": 0.6437021493911743, "step": 2577 }, { "epoch": 0.908546255506608, "grad_norm": 1.547322536503476, "learning_rate": 1.2326521285888892e-05, "loss": 0.8788109421730042, "step": 2578 }, { "epoch": 0.9088986784140969, "grad_norm": 1.4382005842040866, "learning_rate": 1.2320854371650268e-05, "loss": 0.704395055770874, "step": 2579 }, { "epoch": 0.9092511013215859, "grad_norm": 1.410037340911335, "learning_rate": 1.2315186669553814e-05, "loss": 0.6579844951629639, "step": 2580 }, { "epoch": 0.9096035242290749, "grad_norm": 1.3089054036910626, "learning_rate": 1.2309518181523537e-05, "loss": 0.6329941749572754, "step": 2581 }, { "epoch": 0.9099559471365639, "grad_norm": 1.746183595307062, "learning_rate": 1.2303848909483711e-05, "loss": 0.8868603706359863, "step": 2582 }, { "epoch": 0.9103083700440529, "grad_norm": 1.4531546458491524, "learning_rate": 1.2298178855358875e-05, "loss": 0.6402688026428223, "step": 2583 }, { "epoch": 0.9106607929515419, "grad_norm": 1.3289180353613772, "learning_rate": 1.2292508021073846e-05, "loss": 0.8017194271087646, "step": 2584 }, { "epoch": 0.9110132158590308, "grad_norm": 1.6340808373356166, "learning_rate": 1.2286836408553687e-05, "loss": 0.7396517992019653, "step": 2585 }, { "epoch": 0.9113656387665198, "grad_norm": 1.5443847526543046, "learning_rate": 1.2281164019723737e-05, "loss": 0.6123272776603699, "step": 2586 }, { "epoch": 0.9117180616740088, "grad_norm": 1.464544186162697, "learning_rate": 1.2275490856509591e-05, "loss": 0.7675807476043701, "step": 2587 }, { "epoch": 0.9120704845814978, "grad_norm": 1.67164115622116, "learning_rate": 1.2269816920837121e-05, "loss": 0.6814998388290405, "step": 2588 }, { "epoch": 0.9124229074889868, "grad_norm": 1.3228366401729674, "learning_rate": 1.2264142214632441e-05, "loss": 0.6290348768234253, "step": 2589 }, { "epoch": 0.9127753303964757, "grad_norm": 1.5676260945728981, "learning_rate": 1.2258466739821946e-05, "loss": 0.6752464175224304, "step": 2590 }, { "epoch": 0.9131277533039648, "grad_norm": 1.3388236473063337, "learning_rate": 1.2252790498332275e-05, "loss": 0.6153687238693237, "step": 2591 }, { "epoch": 0.9134801762114537, "grad_norm": 1.5346187118504635, "learning_rate": 1.2247113492090344e-05, "loss": 0.5952479839324951, "step": 2592 }, { "epoch": 0.9138325991189428, "grad_norm": 1.4457638395568853, "learning_rate": 1.2241435723023309e-05, "loss": 0.5457659959793091, "step": 2593 }, { "epoch": 0.9141850220264317, "grad_norm": 1.5389040689398128, "learning_rate": 1.2235757193058607e-05, "loss": 0.7373491525650024, "step": 2594 }, { "epoch": 0.9145374449339208, "grad_norm": 1.3149945847764668, "learning_rate": 1.2230077904123914e-05, "loss": 0.6564488410949707, "step": 2595 }, { "epoch": 0.9148898678414097, "grad_norm": 1.8716233271125673, "learning_rate": 1.2224397858147176e-05, "loss": 0.6790947914123535, "step": 2596 }, { "epoch": 0.9152422907488986, "grad_norm": 1.6467277287942856, "learning_rate": 1.2218717057056592e-05, "loss": 0.8304486274719238, "step": 2597 }, { "epoch": 0.9155947136563877, "grad_norm": 1.7018746535629268, "learning_rate": 1.2213035502780616e-05, "loss": 0.7452701330184937, "step": 2598 }, { "epoch": 0.9159471365638766, "grad_norm": 1.270448247487427, "learning_rate": 1.2207353197247957e-05, "loss": 0.572200357913971, "step": 2599 }, { "epoch": 0.9162995594713657, "grad_norm": 1.574291214704138, "learning_rate": 1.2201670142387587e-05, "loss": 0.7142342925071716, "step": 2600 }, { "epoch": 0.9166519823788546, "grad_norm": 1.367606009894927, "learning_rate": 1.219598634012872e-05, "loss": 0.9390528202056885, "step": 2601 }, { "epoch": 0.9170044052863436, "grad_norm": 1.6870829349403977, "learning_rate": 1.2190301792400832e-05, "loss": 0.6897540092468262, "step": 2602 }, { "epoch": 0.9173568281938326, "grad_norm": 1.5631074773710765, "learning_rate": 1.2184616501133649e-05, "loss": 0.7309582233428955, "step": 2603 }, { "epoch": 0.9177092511013216, "grad_norm": 1.4956685909345118, "learning_rate": 1.2178930468257154e-05, "loss": 0.7692370414733887, "step": 2604 }, { "epoch": 0.9180616740088106, "grad_norm": 1.6160577913139176, "learning_rate": 1.2173243695701575e-05, "loss": 0.7650456428527832, "step": 2605 }, { "epoch": 0.9184140969162996, "grad_norm": 1.4419682356133905, "learning_rate": 1.2167556185397396e-05, "loss": 0.6000699996948242, "step": 2606 }, { "epoch": 0.9187665198237885, "grad_norm": 1.368037173998054, "learning_rate": 1.2161867939275344e-05, "loss": 0.6227651834487915, "step": 2607 }, { "epoch": 0.9191189427312776, "grad_norm": 1.3507337866227296, "learning_rate": 1.2156178959266414e-05, "loss": 0.6554160118103027, "step": 2608 }, { "epoch": 0.9194713656387665, "grad_norm": 1.4986959017577084, "learning_rate": 1.2150489247301826e-05, "loss": 0.5360773801803589, "step": 2609 }, { "epoch": 0.9198237885462555, "grad_norm": 1.3546990782009203, "learning_rate": 1.2144798805313065e-05, "loss": 0.7184062004089355, "step": 2610 }, { "epoch": 0.9201762114537445, "grad_norm": 1.6293146255106934, "learning_rate": 1.2139107635231857e-05, "loss": 0.646910548210144, "step": 2611 }, { "epoch": 0.9205286343612334, "grad_norm": 1.449047238736513, "learning_rate": 1.2133415738990179e-05, "loss": 0.7794413566589355, "step": 2612 }, { "epoch": 0.9208810572687225, "grad_norm": 1.5173448374489182, "learning_rate": 1.2127723118520254e-05, "loss": 0.5904654860496521, "step": 2613 }, { "epoch": 0.9212334801762114, "grad_norm": 1.6062827687776695, "learning_rate": 1.2122029775754545e-05, "loss": 0.5526635646820068, "step": 2614 }, { "epoch": 0.9215859030837005, "grad_norm": 1.584080412995617, "learning_rate": 1.2116335712625766e-05, "loss": 0.6832528710365295, "step": 2615 }, { "epoch": 0.9219383259911894, "grad_norm": 1.5962919739796952, "learning_rate": 1.211064093106688e-05, "loss": 0.5858304500579834, "step": 2616 }, { "epoch": 0.9222907488986785, "grad_norm": 1.6542154949587857, "learning_rate": 1.2104945433011079e-05, "loss": 0.7383478879928589, "step": 2617 }, { "epoch": 0.9226431718061674, "grad_norm": 1.4197774198085091, "learning_rate": 1.2099249220391815e-05, "loss": 0.6466768980026245, "step": 2618 }, { "epoch": 0.9229955947136564, "grad_norm": 1.6780588288371647, "learning_rate": 1.209355229514277e-05, "loss": 0.5681238174438477, "step": 2619 }, { "epoch": 0.9233480176211454, "grad_norm": 1.4473210287022626, "learning_rate": 1.2087854659197874e-05, "loss": 0.5726606249809265, "step": 2620 }, { "epoch": 0.9237004405286343, "grad_norm": 1.5671254030487451, "learning_rate": 1.2082156314491298e-05, "loss": 0.6643342971801758, "step": 2621 }, { "epoch": 0.9240528634361234, "grad_norm": 1.6891696074210503, "learning_rate": 1.2076457262957454e-05, "loss": 0.5408967733383179, "step": 2622 }, { "epoch": 0.9244052863436123, "grad_norm": 1.503887173232949, "learning_rate": 1.207075750653099e-05, "loss": 0.706169843673706, "step": 2623 }, { "epoch": 0.9247577092511013, "grad_norm": 1.7934999117659478, "learning_rate": 1.2065057047146797e-05, "loss": 0.7973969578742981, "step": 2624 }, { "epoch": 0.9251101321585903, "grad_norm": 1.4120942109312036, "learning_rate": 1.2059355886740002e-05, "loss": 0.6907010674476624, "step": 2625 }, { "epoch": 0.9254625550660793, "grad_norm": 1.8378017160561377, "learning_rate": 1.2053654027245977e-05, "loss": 0.8174253702163696, "step": 2626 }, { "epoch": 0.9258149779735683, "grad_norm": 1.8873519737119473, "learning_rate": 1.204795147060032e-05, "loss": 0.60319983959198, "step": 2627 }, { "epoch": 0.9261674008810573, "grad_norm": 2.916318649806586, "learning_rate": 1.204224821873887e-05, "loss": 0.718228816986084, "step": 2628 }, { "epoch": 0.9265198237885462, "grad_norm": 1.5801609410641386, "learning_rate": 1.2036544273597708e-05, "loss": 0.7385132312774658, "step": 2629 }, { "epoch": 0.9268722466960353, "grad_norm": 1.5320403236251587, "learning_rate": 1.203083963711315e-05, "loss": 0.7700635194778442, "step": 2630 }, { "epoch": 0.9272246696035242, "grad_norm": 1.4381703720368488, "learning_rate": 1.2025134311221732e-05, "loss": 0.8767666816711426, "step": 2631 }, { "epoch": 0.9275770925110132, "grad_norm": 1.4534975042510074, "learning_rate": 1.2019428297860241e-05, "loss": 0.6517986059188843, "step": 2632 }, { "epoch": 0.9279295154185022, "grad_norm": 1.3295910752440807, "learning_rate": 1.2013721598965688e-05, "loss": 0.5967941284179688, "step": 2633 }, { "epoch": 0.9282819383259912, "grad_norm": 2.085302745009741, "learning_rate": 1.2008014216475327e-05, "loss": 0.7480533123016357, "step": 2634 }, { "epoch": 0.9286343612334802, "grad_norm": 1.415633444981562, "learning_rate": 1.2002306152326626e-05, "loss": 0.8020488023757935, "step": 2635 }, { "epoch": 0.9289867841409691, "grad_norm": 1.235581839334599, "learning_rate": 1.1996597408457302e-05, "loss": 0.5535889863967896, "step": 2636 }, { "epoch": 0.9293392070484582, "grad_norm": 1.5093780754929471, "learning_rate": 1.1990887986805295e-05, "loss": 0.6888864040374756, "step": 2637 }, { "epoch": 0.9296916299559471, "grad_norm": 1.761723253773031, "learning_rate": 1.1985177889308777e-05, "loss": 0.7723515033721924, "step": 2638 }, { "epoch": 0.9300440528634362, "grad_norm": 2.4386861549294476, "learning_rate": 1.1979467117906143e-05, "loss": 0.6929488182067871, "step": 2639 }, { "epoch": 0.9303964757709251, "grad_norm": 1.7413716913523498, "learning_rate": 1.1973755674536027e-05, "loss": 0.7025216221809387, "step": 2640 }, { "epoch": 0.9307488986784141, "grad_norm": 1.5278537581621425, "learning_rate": 1.1968043561137287e-05, "loss": 0.6618740558624268, "step": 2641 }, { "epoch": 0.9311013215859031, "grad_norm": 1.3720349025623486, "learning_rate": 1.1962330779649002e-05, "loss": 0.5308352708816528, "step": 2642 }, { "epoch": 0.931453744493392, "grad_norm": 1.6043152610659495, "learning_rate": 1.1956617332010488e-05, "loss": 0.6559470891952515, "step": 2643 }, { "epoch": 0.9318061674008811, "grad_norm": 1.5758989244918422, "learning_rate": 1.1950903220161286e-05, "loss": 0.6572221517562866, "step": 2644 }, { "epoch": 0.93215859030837, "grad_norm": 1.7357943090474917, "learning_rate": 1.194518844604115e-05, "loss": 0.7854161262512207, "step": 2645 }, { "epoch": 0.932511013215859, "grad_norm": 1.555855365183626, "learning_rate": 1.1939473011590075e-05, "loss": 0.6471760869026184, "step": 2646 }, { "epoch": 0.932863436123348, "grad_norm": 1.5672890328663938, "learning_rate": 1.1933756918748271e-05, "loss": 0.6261184215545654, "step": 2647 }, { "epoch": 0.933215859030837, "grad_norm": 1.425764950800843, "learning_rate": 1.1928040169456176e-05, "loss": 0.6876180171966553, "step": 2648 }, { "epoch": 0.933568281938326, "grad_norm": 1.6203483271740744, "learning_rate": 1.1922322765654446e-05, "loss": 0.6782447099685669, "step": 2649 }, { "epoch": 0.933920704845815, "grad_norm": 1.640471126849017, "learning_rate": 1.1916604709283958e-05, "loss": 0.6085894107818604, "step": 2650 }, { "epoch": 0.934273127753304, "grad_norm": 1.6964969219798813, "learning_rate": 1.1910886002285822e-05, "loss": 0.6940577030181885, "step": 2651 }, { "epoch": 0.934625550660793, "grad_norm": 1.4704189591593113, "learning_rate": 1.1905166646601356e-05, "loss": 0.8204144239425659, "step": 2652 }, { "epoch": 0.9349779735682819, "grad_norm": 1.389489538033466, "learning_rate": 1.1899446644172106e-05, "loss": 0.6184309720993042, "step": 2653 }, { "epoch": 0.9353303964757709, "grad_norm": 2.1507675107714306, "learning_rate": 1.1893725996939831e-05, "loss": 0.7499250173568726, "step": 2654 }, { "epoch": 0.9356828193832599, "grad_norm": 1.739709417281562, "learning_rate": 1.1888004706846519e-05, "loss": 0.7021058797836304, "step": 2655 }, { "epoch": 0.9360352422907489, "grad_norm": 1.4311959050457856, "learning_rate": 1.188228277583436e-05, "loss": 0.6005666255950928, "step": 2656 }, { "epoch": 0.9363876651982379, "grad_norm": 1.4910024814198868, "learning_rate": 1.1876560205845782e-05, "loss": 0.6572481393814087, "step": 2657 }, { "epoch": 0.9367400881057268, "grad_norm": 1.5258435486694566, "learning_rate": 1.187083699882341e-05, "loss": 0.7402434349060059, "step": 2658 }, { "epoch": 0.9370925110132159, "grad_norm": 1.4352893489445113, "learning_rate": 1.1865113156710106e-05, "loss": 0.6693596243858337, "step": 2659 }, { "epoch": 0.9374449339207048, "grad_norm": 1.6704808140330663, "learning_rate": 1.1859388681448925e-05, "loss": 0.7708617448806763, "step": 2660 }, { "epoch": 0.9377973568281939, "grad_norm": 1.4245143913781195, "learning_rate": 1.1853663574983154e-05, "loss": 0.5871701836585999, "step": 2661 }, { "epoch": 0.9381497797356828, "grad_norm": 1.505716027406483, "learning_rate": 1.1847937839256287e-05, "loss": 0.6492994427680969, "step": 2662 }, { "epoch": 0.9385022026431719, "grad_norm": 1.3908643684674444, "learning_rate": 1.1842211476212038e-05, "loss": 0.6803429126739502, "step": 2663 }, { "epoch": 0.9388546255506608, "grad_norm": 1.5017846140199234, "learning_rate": 1.1836484487794318e-05, "loss": 0.5602244734764099, "step": 2664 }, { "epoch": 0.9392070484581497, "grad_norm": 1.2797636855685697, "learning_rate": 1.183075687594727e-05, "loss": 0.6562157869338989, "step": 2665 }, { "epoch": 0.9395594713656388, "grad_norm": 1.4855818018568143, "learning_rate": 1.182502864261524e-05, "loss": 0.71474289894104, "step": 2666 }, { "epoch": 0.9399118942731277, "grad_norm": 1.5995143445420303, "learning_rate": 1.1819299789742782e-05, "loss": 0.7130062580108643, "step": 2667 }, { "epoch": 0.9402643171806168, "grad_norm": 1.645740195320987, "learning_rate": 1.1813570319274663e-05, "loss": 0.788813054561615, "step": 2668 }, { "epoch": 0.9406167400881057, "grad_norm": 1.965041520497338, "learning_rate": 1.1807840233155863e-05, "loss": 0.6485022306442261, "step": 2669 }, { "epoch": 0.9409691629955947, "grad_norm": 1.6399057690578631, "learning_rate": 1.1802109533331562e-05, "loss": 0.4491521418094635, "step": 2670 }, { "epoch": 0.9413215859030837, "grad_norm": 1.6744760497066637, "learning_rate": 1.1796378221747162e-05, "loss": 0.6073683500289917, "step": 2671 }, { "epoch": 0.9416740088105727, "grad_norm": 1.859395754773969, "learning_rate": 1.179064630034826e-05, "loss": 0.5942971706390381, "step": 2672 }, { "epoch": 0.9420264317180617, "grad_norm": 1.4303169952284007, "learning_rate": 1.1784913771080667e-05, "loss": 0.7295013666152954, "step": 2673 }, { "epoch": 0.9423788546255507, "grad_norm": 1.8192026049611665, "learning_rate": 1.1779180635890394e-05, "loss": 0.7347372770309448, "step": 2674 }, { "epoch": 0.9427312775330396, "grad_norm": 1.5350977995485566, "learning_rate": 1.1773446896723668e-05, "loss": 0.5591942667961121, "step": 2675 }, { "epoch": 0.9430837004405286, "grad_norm": 1.5036340589436215, "learning_rate": 1.1767712555526911e-05, "loss": 0.822568953037262, "step": 2676 }, { "epoch": 0.9434361233480176, "grad_norm": 1.4619836017557306, "learning_rate": 1.1761977614246757e-05, "loss": 0.649920642375946, "step": 2677 }, { "epoch": 0.9437885462555066, "grad_norm": 1.4884584586985279, "learning_rate": 1.1756242074830036e-05, "loss": 0.6298861503601074, "step": 2678 }, { "epoch": 0.9441409691629956, "grad_norm": 1.6194483495779424, "learning_rate": 1.1750505939223787e-05, "loss": 0.81938636302948, "step": 2679 }, { "epoch": 0.9444933920704845, "grad_norm": 1.4751430048371623, "learning_rate": 1.1744769209375248e-05, "loss": 0.6627225875854492, "step": 2680 }, { "epoch": 0.9448458149779736, "grad_norm": 1.310837287475738, "learning_rate": 1.1739031887231864e-05, "loss": 0.6563318371772766, "step": 2681 }, { "epoch": 0.9451982378854625, "grad_norm": 1.3782616320804129, "learning_rate": 1.1733293974741273e-05, "loss": 0.5702694654464722, "step": 2682 }, { "epoch": 0.9455506607929516, "grad_norm": 1.5543579440741437, "learning_rate": 1.1727555473851321e-05, "loss": 0.685553789138794, "step": 2683 }, { "epoch": 0.9459030837004405, "grad_norm": 1.2085432227797441, "learning_rate": 1.172181638651005e-05, "loss": 0.6092622876167297, "step": 2684 }, { "epoch": 0.9462555066079296, "grad_norm": 2.0946243925185013, "learning_rate": 1.1716076714665701e-05, "loss": 0.6650614738464355, "step": 2685 }, { "epoch": 0.9466079295154185, "grad_norm": 1.6479809864443196, "learning_rate": 1.171033646026671e-05, "loss": 0.7665754556655884, "step": 2686 }, { "epoch": 0.9469603524229074, "grad_norm": 1.3199886923676785, "learning_rate": 1.1704595625261722e-05, "loss": 0.6365277171134949, "step": 2687 }, { "epoch": 0.9473127753303965, "grad_norm": 1.4825934002405374, "learning_rate": 1.1698854211599565e-05, "loss": 0.6622267961502075, "step": 2688 }, { "epoch": 0.9476651982378854, "grad_norm": 1.4519347010464663, "learning_rate": 1.1693112221229278e-05, "loss": 0.6636145710945129, "step": 2689 }, { "epoch": 0.9480176211453745, "grad_norm": 1.3381328445735352, "learning_rate": 1.168736965610008e-05, "loss": 0.6943212747573853, "step": 2690 }, { "epoch": 0.9483700440528634, "grad_norm": 1.5439836232478343, "learning_rate": 1.1681626518161397e-05, "loss": 0.7479512691497803, "step": 2691 }, { "epoch": 0.9487224669603525, "grad_norm": 1.5424571304173897, "learning_rate": 1.1675882809362846e-05, "loss": 0.7227041721343994, "step": 2692 }, { "epoch": 0.9490748898678414, "grad_norm": 1.3855049912904343, "learning_rate": 1.1670138531654238e-05, "loss": 0.7366166114807129, "step": 2693 }, { "epoch": 0.9494273127753304, "grad_norm": 1.634945701470733, "learning_rate": 1.1664393686985571e-05, "loss": 0.8634493350982666, "step": 2694 }, { "epoch": 0.9497797356828194, "grad_norm": 1.3102748532201536, "learning_rate": 1.165864827730705e-05, "loss": 0.5802862048149109, "step": 2695 }, { "epoch": 0.9501321585903084, "grad_norm": 1.571840947668404, "learning_rate": 1.1652902304569053e-05, "loss": 0.5931085348129272, "step": 2696 }, { "epoch": 0.9504845814977974, "grad_norm": 1.7175179856841813, "learning_rate": 1.164715577072217e-05, "loss": 0.7684508562088013, "step": 2697 }, { "epoch": 0.9508370044052863, "grad_norm": 1.6094834386500196, "learning_rate": 1.1641408677717158e-05, "loss": 0.94246906042099, "step": 2698 }, { "epoch": 0.9511894273127753, "grad_norm": 1.3999360216133725, "learning_rate": 1.1635661027504985e-05, "loss": 0.7072316408157349, "step": 2699 }, { "epoch": 0.9515418502202643, "grad_norm": 1.5926279454886292, "learning_rate": 1.16299128220368e-05, "loss": 0.5872572064399719, "step": 2700 }, { "epoch": 0.9518942731277533, "grad_norm": 1.4987885212929257, "learning_rate": 1.1624164063263931e-05, "loss": 0.6549060344696045, "step": 2701 }, { "epoch": 0.9522466960352423, "grad_norm": 1.6773153304869155, "learning_rate": 1.161841475313791e-05, "loss": 0.7338137626647949, "step": 2702 }, { "epoch": 0.9525991189427313, "grad_norm": 1.6523970676343225, "learning_rate": 1.161266489361045e-05, "loss": 0.6942911148071289, "step": 2703 }, { "epoch": 0.9529515418502202, "grad_norm": 2.037450532351288, "learning_rate": 1.1606914486633444e-05, "loss": 0.674375057220459, "step": 2704 }, { "epoch": 0.9533039647577093, "grad_norm": 1.6450610385875453, "learning_rate": 1.160116353415898e-05, "loss": 0.6790377497673035, "step": 2705 }, { "epoch": 0.9536563876651982, "grad_norm": 1.6724856793361191, "learning_rate": 1.1595412038139326e-05, "loss": 0.5902142524719238, "step": 2706 }, { "epoch": 0.9540088105726873, "grad_norm": 1.4286047469499437, "learning_rate": 1.1589660000526937e-05, "loss": 0.7034019231796265, "step": 2707 }, { "epoch": 0.9543612334801762, "grad_norm": 3.1062423334867106, "learning_rate": 1.158390742327445e-05, "loss": 0.6986846923828125, "step": 2708 }, { "epoch": 0.9547136563876651, "grad_norm": 1.8367783325674814, "learning_rate": 1.1578154308334683e-05, "loss": 0.6972544193267822, "step": 2709 }, { "epoch": 0.9550660792951542, "grad_norm": 1.3370474194561557, "learning_rate": 1.1572400657660646e-05, "loss": 0.6312702298164368, "step": 2710 }, { "epoch": 0.9554185022026431, "grad_norm": 1.7161015062577845, "learning_rate": 1.1566646473205518e-05, "loss": 0.7584360241889954, "step": 2711 }, { "epoch": 0.9557709251101322, "grad_norm": 1.256436023255263, "learning_rate": 1.156089175692267e-05, "loss": 0.700894296169281, "step": 2712 }, { "epoch": 0.9561233480176211, "grad_norm": 1.3257581819044393, "learning_rate": 1.1555136510765645e-05, "loss": 0.5637902617454529, "step": 2713 }, { "epoch": 0.9564757709251102, "grad_norm": 1.388319575976614, "learning_rate": 1.1549380736688173e-05, "loss": 0.4537314772605896, "step": 2714 }, { "epoch": 0.9568281938325991, "grad_norm": 1.8324279373886256, "learning_rate": 1.1543624436644161e-05, "loss": 0.7880423069000244, "step": 2715 }, { "epoch": 0.9571806167400881, "grad_norm": 1.6310441104063826, "learning_rate": 1.1537867612587692e-05, "loss": 0.7314344644546509, "step": 2716 }, { "epoch": 0.9575330396475771, "grad_norm": 1.7810937354544796, "learning_rate": 1.1532110266473026e-05, "loss": 0.9550024271011353, "step": 2717 }, { "epoch": 0.9578854625550661, "grad_norm": 1.3474455317445524, "learning_rate": 1.152635240025461e-05, "loss": 0.6482470035552979, "step": 2718 }, { "epoch": 0.9582378854625551, "grad_norm": 1.6637520992254753, "learning_rate": 1.152059401588705e-05, "loss": 0.6347365975379944, "step": 2719 }, { "epoch": 0.958590308370044, "grad_norm": 1.469780222161662, "learning_rate": 1.151483511532515e-05, "loss": 0.7214993238449097, "step": 2720 }, { "epoch": 0.958942731277533, "grad_norm": 1.4597118679681749, "learning_rate": 1.1509075700523869e-05, "loss": 0.6255312561988831, "step": 2721 }, { "epoch": 0.959295154185022, "grad_norm": 1.4735593911126945, "learning_rate": 1.1503315773438352e-05, "loss": 0.6152437925338745, "step": 2722 }, { "epoch": 0.959647577092511, "grad_norm": 1.8178378627357112, "learning_rate": 1.1497555336023916e-05, "loss": 0.6565401554107666, "step": 2723 }, { "epoch": 0.96, "grad_norm": 1.5268947365741583, "learning_rate": 1.1491794390236047e-05, "loss": 0.796178936958313, "step": 2724 }, { "epoch": 0.960352422907489, "grad_norm": 1.4289859748860345, "learning_rate": 1.1486032938030409e-05, "loss": 0.6243436336517334, "step": 2725 }, { "epoch": 0.960704845814978, "grad_norm": 3.1702620206811036, "learning_rate": 1.148027098136284e-05, "loss": 0.6043159365653992, "step": 2726 }, { "epoch": 0.961057268722467, "grad_norm": 2.2643023721896554, "learning_rate": 1.1474508522189334e-05, "loss": 0.7268002033233643, "step": 2727 }, { "epoch": 0.9614096916299559, "grad_norm": 1.6105062692265093, "learning_rate": 1.1468745562466076e-05, "loss": 0.6156840324401855, "step": 2728 }, { "epoch": 0.961762114537445, "grad_norm": 1.3602355982897767, "learning_rate": 1.1462982104149409e-05, "loss": 0.8415796756744385, "step": 2729 }, { "epoch": 0.9621145374449339, "grad_norm": 1.7603646172978014, "learning_rate": 1.145721814919585e-05, "loss": 0.5983521342277527, "step": 2730 }, { "epoch": 0.962466960352423, "grad_norm": 1.6358592349658665, "learning_rate": 1.1451453699562077e-05, "loss": 0.6144511699676514, "step": 2731 }, { "epoch": 0.9628193832599119, "grad_norm": 1.66844617820458, "learning_rate": 1.1445688757204942e-05, "loss": 0.6449630260467529, "step": 2732 }, { "epoch": 0.9631718061674008, "grad_norm": 1.5343236560799753, "learning_rate": 1.1439923324081465e-05, "loss": 0.7321716547012329, "step": 2733 }, { "epoch": 0.9635242290748899, "grad_norm": 1.9877317345810759, "learning_rate": 1.1434157402148838e-05, "loss": 0.8354923129081726, "step": 2734 }, { "epoch": 0.9638766519823788, "grad_norm": 1.3653549857555707, "learning_rate": 1.14283909933644e-05, "loss": 0.728820264339447, "step": 2735 }, { "epoch": 0.9642290748898679, "grad_norm": 1.4013626479373464, "learning_rate": 1.1422624099685675e-05, "loss": 0.6683202981948853, "step": 2736 }, { "epoch": 0.9645814977973568, "grad_norm": 1.6203635868462385, "learning_rate": 1.141685672307034e-05, "loss": 0.7159590125083923, "step": 2737 }, { "epoch": 0.9649339207048458, "grad_norm": 1.9197883933040156, "learning_rate": 1.1411088865476245e-05, "loss": 0.8269981145858765, "step": 2738 }, { "epoch": 0.9652863436123348, "grad_norm": 1.7561037821195844, "learning_rate": 1.1405320528861393e-05, "loss": 0.6993168592453003, "step": 2739 }, { "epoch": 0.9656387665198238, "grad_norm": 1.4700171152077626, "learning_rate": 1.1399551715183956e-05, "loss": 0.6296184062957764, "step": 2740 }, { "epoch": 0.9659911894273128, "grad_norm": 1.5505746175576802, "learning_rate": 1.1393782426402267e-05, "loss": 0.670283317565918, "step": 2741 }, { "epoch": 0.9663436123348018, "grad_norm": 1.6125051339337373, "learning_rate": 1.1388012664474824e-05, "loss": 0.9248946905136108, "step": 2742 }, { "epoch": 0.9666960352422908, "grad_norm": 1.7027770081175677, "learning_rate": 1.1382242431360272e-05, "loss": 0.7965992093086243, "step": 2743 }, { "epoch": 0.9670484581497797, "grad_norm": 1.6413263453773168, "learning_rate": 1.1376471729017435e-05, "loss": 0.632454514503479, "step": 2744 }, { "epoch": 0.9674008810572687, "grad_norm": 1.4364322830343181, "learning_rate": 1.1370700559405283e-05, "loss": 0.6463649272918701, "step": 2745 }, { "epoch": 0.9677533039647577, "grad_norm": 1.5890798975591325, "learning_rate": 1.1364928924482952e-05, "loss": 0.5864677429199219, "step": 2746 }, { "epoch": 0.9681057268722467, "grad_norm": 1.5090045708209912, "learning_rate": 1.1359156826209726e-05, "loss": 0.6313967108726501, "step": 2747 }, { "epoch": 0.9684581497797357, "grad_norm": 1.2634359711899723, "learning_rate": 1.1353384266545056e-05, "loss": 0.5736903548240662, "step": 2748 }, { "epoch": 0.9688105726872247, "grad_norm": 1.3956693120918684, "learning_rate": 1.1347611247448544e-05, "loss": 0.672286868095398, "step": 2749 }, { "epoch": 0.9691629955947136, "grad_norm": 1.7905269273993527, "learning_rate": 1.1341837770879957e-05, "loss": 0.7181379795074463, "step": 2750 }, { "epoch": 0.9695154185022027, "grad_norm": 1.3192307426609728, "learning_rate": 1.1336063838799204e-05, "loss": 0.6160816550254822, "step": 2751 }, { "epoch": 0.9698678414096916, "grad_norm": 1.3858752821091025, "learning_rate": 1.1330289453166361e-05, "loss": 0.737337589263916, "step": 2752 }, { "epoch": 0.9702202643171807, "grad_norm": 1.4067461052680075, "learning_rate": 1.1324514615941644e-05, "loss": 0.6752150058746338, "step": 2753 }, { "epoch": 0.9705726872246696, "grad_norm": 1.502210352579975, "learning_rate": 1.1318739329085438e-05, "loss": 0.6917784214019775, "step": 2754 }, { "epoch": 0.9709251101321585, "grad_norm": 1.873477988490531, "learning_rate": 1.131296359455827e-05, "loss": 0.7863353490829468, "step": 2755 }, { "epoch": 0.9712775330396476, "grad_norm": 1.338648959960645, "learning_rate": 1.1307187414320823e-05, "loss": 0.6236519813537598, "step": 2756 }, { "epoch": 0.9716299559471365, "grad_norm": 1.443196389025093, "learning_rate": 1.130141079033393e-05, "loss": 0.6957560181617737, "step": 2757 }, { "epoch": 0.9719823788546256, "grad_norm": 1.6687230505642796, "learning_rate": 1.1295633724558574e-05, "loss": 0.6460270881652832, "step": 2758 }, { "epoch": 0.9723348017621145, "grad_norm": 1.4575621917812085, "learning_rate": 1.1289856218955892e-05, "loss": 0.7352741956710815, "step": 2759 }, { "epoch": 0.9726872246696036, "grad_norm": 1.7999835448567072, "learning_rate": 1.1284078275487165e-05, "loss": 0.6285911798477173, "step": 2760 }, { "epoch": 0.9730396475770925, "grad_norm": 1.4280819376163427, "learning_rate": 1.1278299896113823e-05, "loss": 0.6577984094619751, "step": 2761 }, { "epoch": 0.9733920704845815, "grad_norm": 1.4424142490511096, "learning_rate": 1.1272521082797452e-05, "loss": 0.6445770859718323, "step": 2762 }, { "epoch": 0.9737444933920705, "grad_norm": 1.3911141072298185, "learning_rate": 1.1266741837499773e-05, "loss": 0.557687520980835, "step": 2763 }, { "epoch": 0.9740969162995595, "grad_norm": 1.559776829553993, "learning_rate": 1.1260962162182664e-05, "loss": 0.6117650866508484, "step": 2764 }, { "epoch": 0.9744493392070485, "grad_norm": 1.4751836492364416, "learning_rate": 1.1255182058808143e-05, "loss": 0.6498113870620728, "step": 2765 }, { "epoch": 0.9748017621145374, "grad_norm": 1.9707928584824135, "learning_rate": 1.1249401529338375e-05, "loss": 0.8738062381744385, "step": 2766 }, { "epoch": 0.9751541850220264, "grad_norm": 1.6389865398372674, "learning_rate": 1.1243620575735672e-05, "loss": 0.551408052444458, "step": 2767 }, { "epoch": 0.9755066079295154, "grad_norm": 1.645802380531443, "learning_rate": 1.1237839199962488e-05, "loss": 0.7197355031967163, "step": 2768 }, { "epoch": 0.9758590308370044, "grad_norm": 1.5393826706252047, "learning_rate": 1.1232057403981415e-05, "loss": 0.5704015493392944, "step": 2769 }, { "epoch": 0.9762114537444934, "grad_norm": 1.373872634740153, "learning_rate": 1.1226275189755199e-05, "loss": 0.603929877281189, "step": 2770 }, { "epoch": 0.9765638766519824, "grad_norm": 1.731229349756288, "learning_rate": 1.1220492559246719e-05, "loss": 0.8652673363685608, "step": 2771 }, { "epoch": 0.9769162995594713, "grad_norm": 1.5891679358388853, "learning_rate": 1.1214709514418998e-05, "loss": 0.6827684044837952, "step": 2772 }, { "epoch": 0.9772687224669604, "grad_norm": 1.3323036683469254, "learning_rate": 1.1208926057235197e-05, "loss": 0.5584808588027954, "step": 2773 }, { "epoch": 0.9776211453744493, "grad_norm": 1.5495557729443614, "learning_rate": 1.1203142189658627e-05, "loss": 0.7242820262908936, "step": 2774 }, { "epoch": 0.9779735682819384, "grad_norm": 1.3489108616226997, "learning_rate": 1.1197357913652725e-05, "loss": 0.5299571752548218, "step": 2775 }, { "epoch": 0.9783259911894273, "grad_norm": 1.8541326435971137, "learning_rate": 1.1191573231181074e-05, "loss": 0.69478440284729, "step": 2776 }, { "epoch": 0.9786784140969162, "grad_norm": 1.540885425711554, "learning_rate": 1.1185788144207394e-05, "loss": 0.6997090578079224, "step": 2777 }, { "epoch": 0.9790308370044053, "grad_norm": 1.422432956680528, "learning_rate": 1.1180002654695543e-05, "loss": 0.6882679462432861, "step": 2778 }, { "epoch": 0.9793832599118942, "grad_norm": 1.5811365233101125, "learning_rate": 1.1174216764609514e-05, "loss": 0.6434916257858276, "step": 2779 }, { "epoch": 0.9797356828193833, "grad_norm": 1.5811226707061032, "learning_rate": 1.1168430475913437e-05, "loss": 0.6614376902580261, "step": 2780 }, { "epoch": 0.9800881057268722, "grad_norm": 1.380437766979243, "learning_rate": 1.1162643790571574e-05, "loss": 0.6440471410751343, "step": 2781 }, { "epoch": 0.9804405286343613, "grad_norm": 1.6997398594970703, "learning_rate": 1.1156856710548327e-05, "loss": 0.6493573188781738, "step": 2782 }, { "epoch": 0.9807929515418502, "grad_norm": 1.5246321952125226, "learning_rate": 1.1151069237808231e-05, "loss": 0.660174548625946, "step": 2783 }, { "epoch": 0.9811453744493392, "grad_norm": 1.7392611870715098, "learning_rate": 1.1145281374315953e-05, "loss": 0.8041812181472778, "step": 2784 }, { "epoch": 0.9814977973568282, "grad_norm": 1.3479949919135392, "learning_rate": 1.1139493122036289e-05, "loss": 0.4758625030517578, "step": 2785 }, { "epoch": 0.9818502202643172, "grad_norm": 1.6334305751982239, "learning_rate": 1.113370448293417e-05, "loss": 0.6482613682746887, "step": 2786 }, { "epoch": 0.9822026431718062, "grad_norm": 1.475447708954463, "learning_rate": 1.1127915458974665e-05, "loss": 0.6911569237709045, "step": 2787 }, { "epoch": 0.9825550660792951, "grad_norm": 1.362340888945518, "learning_rate": 1.1122126052122963e-05, "loss": 0.6851824522018433, "step": 2788 }, { "epoch": 0.9829074889867842, "grad_norm": 1.5792587066367831, "learning_rate": 1.111633626434439e-05, "loss": 0.6405081748962402, "step": 2789 }, { "epoch": 0.9832599118942731, "grad_norm": 1.5781550908818451, "learning_rate": 1.1110546097604391e-05, "loss": 0.7064476013183594, "step": 2790 }, { "epoch": 0.9836123348017621, "grad_norm": 1.4647903320195184, "learning_rate": 1.1104755553868559e-05, "loss": 0.641350269317627, "step": 2791 }, { "epoch": 0.9839647577092511, "grad_norm": 1.4142953897430577, "learning_rate": 1.1098964635102597e-05, "loss": 0.748977780342102, "step": 2792 }, { "epoch": 0.9843171806167401, "grad_norm": 1.3989289975006294, "learning_rate": 1.1093173343272342e-05, "loss": 0.6033440828323364, "step": 2793 }, { "epoch": 0.984669603524229, "grad_norm": 1.2877663440814373, "learning_rate": 1.1087381680343754e-05, "loss": 0.5684633255004883, "step": 2794 }, { "epoch": 0.9850220264317181, "grad_norm": 1.5189384787980884, "learning_rate": 1.1081589648282928e-05, "loss": 0.7041289210319519, "step": 2795 }, { "epoch": 0.985374449339207, "grad_norm": 1.5616342989862266, "learning_rate": 1.1075797249056079e-05, "loss": 0.7189786434173584, "step": 2796 }, { "epoch": 0.9857268722466961, "grad_norm": 1.534620191791425, "learning_rate": 1.1070004484629543e-05, "loss": 0.5114344358444214, "step": 2797 }, { "epoch": 0.986079295154185, "grad_norm": 1.6541092784437663, "learning_rate": 1.1064211356969782e-05, "loss": 0.5897136926651001, "step": 2798 }, { "epoch": 0.986431718061674, "grad_norm": 1.5980123151797752, "learning_rate": 1.1058417868043387e-05, "loss": 0.8490760326385498, "step": 2799 }, { "epoch": 0.986784140969163, "grad_norm": 1.5100542298165633, "learning_rate": 1.1052624019817065e-05, "loss": 0.6392524242401123, "step": 2800 }, { "epoch": 0.9871365638766519, "grad_norm": 1.5630522519900902, "learning_rate": 1.104682981425765e-05, "loss": 0.7267303466796875, "step": 2801 }, { "epoch": 0.987488986784141, "grad_norm": 1.5413815660334662, "learning_rate": 1.1041035253332087e-05, "loss": 0.6622469425201416, "step": 2802 }, { "epoch": 0.9878414096916299, "grad_norm": 1.4547931829788883, "learning_rate": 1.1035240339007454e-05, "loss": 0.643883466720581, "step": 2803 }, { "epoch": 0.988193832599119, "grad_norm": 1.4919310534649226, "learning_rate": 1.1029445073250945e-05, "loss": 0.6281142234802246, "step": 2804 }, { "epoch": 0.9885462555066079, "grad_norm": 1.606048707782168, "learning_rate": 1.1023649458029873e-05, "loss": 0.6356241703033447, "step": 2805 }, { "epoch": 0.988898678414097, "grad_norm": 1.7018688321982895, "learning_rate": 1.1017853495311664e-05, "loss": 0.8118115663528442, "step": 2806 }, { "epoch": 0.9892511013215859, "grad_norm": 1.4779776881835476, "learning_rate": 1.1012057187063872e-05, "loss": 0.7673395276069641, "step": 2807 }, { "epoch": 0.9896035242290749, "grad_norm": 1.5158382122898324, "learning_rate": 1.1006260535254159e-05, "loss": 0.6617262959480286, "step": 2808 }, { "epoch": 0.9899559471365639, "grad_norm": 1.7342419352159402, "learning_rate": 1.1000463541850315e-05, "loss": 0.537519097328186, "step": 2809 }, { "epoch": 0.9903083700440528, "grad_norm": 1.8093297060046025, "learning_rate": 1.0994666208820229e-05, "loss": 0.6281024813652039, "step": 2810 }, { "epoch": 0.9906607929515419, "grad_norm": 1.4111971416204439, "learning_rate": 1.0988868538131922e-05, "loss": 0.7189136743545532, "step": 2811 }, { "epoch": 0.9910132158590308, "grad_norm": 1.3844162550962045, "learning_rate": 1.098307053175352e-05, "loss": 0.622093677520752, "step": 2812 }, { "epoch": 0.9913656387665198, "grad_norm": 1.4032650881900075, "learning_rate": 1.0977272191653272e-05, "loss": 0.6774802207946777, "step": 2813 }, { "epoch": 0.9917180616740088, "grad_norm": 1.490303383982121, "learning_rate": 1.0971473519799523e-05, "loss": 0.5999646186828613, "step": 2814 }, { "epoch": 0.9920704845814978, "grad_norm": 1.3508886274303966, "learning_rate": 1.096567451816075e-05, "loss": 0.6450619697570801, "step": 2815 }, { "epoch": 0.9924229074889868, "grad_norm": 1.8693455627252262, "learning_rate": 1.0959875188705529e-05, "loss": 0.693134069442749, "step": 2816 }, { "epoch": 0.9927753303964758, "grad_norm": 1.744167199385734, "learning_rate": 1.0954075533402557e-05, "loss": 0.8968616724014282, "step": 2817 }, { "epoch": 0.9931277533039647, "grad_norm": 1.5750441805034816, "learning_rate": 1.0948275554220632e-05, "loss": 0.6114391088485718, "step": 2818 }, { "epoch": 0.9934801762114538, "grad_norm": 1.3761860122661305, "learning_rate": 1.0942475253128667e-05, "loss": 0.7583796977996826, "step": 2819 }, { "epoch": 0.9938325991189427, "grad_norm": 2.0494911253957735, "learning_rate": 1.0936674632095683e-05, "loss": 0.5683549046516418, "step": 2820 }, { "epoch": 0.9941850220264317, "grad_norm": 1.4100630352107084, "learning_rate": 1.0930873693090815e-05, "loss": 0.5664689540863037, "step": 2821 }, { "epoch": 0.9945374449339207, "grad_norm": 1.1859055454278844, "learning_rate": 1.0925072438083296e-05, "loss": 0.5799476504325867, "step": 2822 }, { "epoch": 0.9948898678414096, "grad_norm": 1.4558284543811444, "learning_rate": 1.0919270869042475e-05, "loss": 0.6879112720489502, "step": 2823 }, { "epoch": 0.9952422907488987, "grad_norm": 1.3673096151886848, "learning_rate": 1.09134689879378e-05, "loss": 0.6348927021026611, "step": 2824 }, { "epoch": 0.9955947136563876, "grad_norm": 1.5301215006310536, "learning_rate": 1.0907666796738839e-05, "loss": 0.55754554271698, "step": 2825 }, { "epoch": 0.9959471365638767, "grad_norm": 1.6611255848189581, "learning_rate": 1.090186429741524e-05, "loss": 0.6664899587631226, "step": 2826 }, { "epoch": 0.9962995594713656, "grad_norm": 1.3580224067934683, "learning_rate": 1.0896061491936782e-05, "loss": 0.6521929502487183, "step": 2827 }, { "epoch": 0.9966519823788547, "grad_norm": 1.4217882734660863, "learning_rate": 1.0890258382273333e-05, "loss": 0.542471170425415, "step": 2828 }, { "epoch": 0.9970044052863436, "grad_norm": 1.3242120868836005, "learning_rate": 1.0884454970394871e-05, "loss": 0.60117506980896, "step": 2829 }, { "epoch": 0.9973568281938326, "grad_norm": 1.5563969946549858, "learning_rate": 1.0878651258271471e-05, "loss": 0.6783676147460938, "step": 2830 }, { "epoch": 0.9977092511013216, "grad_norm": 1.4867095260992749, "learning_rate": 1.0872847247873315e-05, "loss": 0.7080766558647156, "step": 2831 }, { "epoch": 0.9980616740088105, "grad_norm": 1.7595047000981443, "learning_rate": 1.0867042941170677e-05, "loss": 0.9228106141090393, "step": 2832 }, { "epoch": 0.9984140969162996, "grad_norm": 1.749212162747955, "learning_rate": 1.086123834013395e-05, "loss": 0.7601282596588135, "step": 2833 }, { "epoch": 0.9987665198237885, "grad_norm": 1.388473564306277, "learning_rate": 1.0855433446733607e-05, "loss": 0.7101393342018127, "step": 2834 }, { "epoch": 0.9991189427312775, "grad_norm": 1.426665891638417, "learning_rate": 1.084962826294023e-05, "loss": 0.5006242394447327, "step": 2835 }, { "epoch": 0.9994713656387665, "grad_norm": 1.6063601330711992, "learning_rate": 1.08438227907245e-05, "loss": 0.7270148992538452, "step": 2836 }, { "epoch": 0.9998237885462555, "grad_norm": 1.5770914971205114, "learning_rate": 1.0838017032057194e-05, "loss": 0.7252628803253174, "step": 2837 }, { "epoch": 1.0, "grad_norm": 2.9062070384731578, "learning_rate": 1.0832210988909187e-05, "loss": 0.4579252004623413, "step": 2838 }, { "epoch": 1.000352422907489, "grad_norm": 1.410073366222354, "learning_rate": 1.0826404663251446e-05, "loss": 0.635676920413971, "step": 2839 }, { "epoch": 1.0007048458149779, "grad_norm": 1.5085425099131595, "learning_rate": 1.0820598057055039e-05, "loss": 0.6083015203475952, "step": 2840 }, { "epoch": 1.001057268722467, "grad_norm": 1.2571881093552235, "learning_rate": 1.0814791172291132e-05, "loss": 0.5641704797744751, "step": 2841 }, { "epoch": 1.001409691629956, "grad_norm": 1.448254627835315, "learning_rate": 1.0808984010930981e-05, "loss": 0.7668559551239014, "step": 2842 }, { "epoch": 1.001762114537445, "grad_norm": 1.7836674103878665, "learning_rate": 1.0803176574945933e-05, "loss": 0.5205796957015991, "step": 2843 }, { "epoch": 1.0021145374449338, "grad_norm": 1.2460568970106132, "learning_rate": 1.0797368866307431e-05, "loss": 0.6771252155303955, "step": 2844 }, { "epoch": 1.002466960352423, "grad_norm": 1.3246167691239887, "learning_rate": 1.0791560886987016e-05, "loss": 0.6101677417755127, "step": 2845 }, { "epoch": 1.002819383259912, "grad_norm": 1.683370422985012, "learning_rate": 1.0785752638956315e-05, "loss": 0.5651522874832153, "step": 2846 }, { "epoch": 1.0031718061674009, "grad_norm": 1.3543139981801942, "learning_rate": 1.0779944124187048e-05, "loss": 0.6814571619033813, "step": 2847 }, { "epoch": 1.0035242290748898, "grad_norm": 1.5579116379809095, "learning_rate": 1.0774135344651023e-05, "loss": 0.6786171197891235, "step": 2848 }, { "epoch": 1.003876651982379, "grad_norm": 1.341282658364188, "learning_rate": 1.0768326302320136e-05, "loss": 0.5244907736778259, "step": 2849 }, { "epoch": 1.004229074889868, "grad_norm": 1.5100504884551087, "learning_rate": 1.0762516999166383e-05, "loss": 0.6368712186813354, "step": 2850 }, { "epoch": 1.0045814977973568, "grad_norm": 1.3929085404961679, "learning_rate": 1.0756707437161841e-05, "loss": 0.6389411687850952, "step": 2851 }, { "epoch": 1.0049339207048458, "grad_norm": 1.796913818431425, "learning_rate": 1.0750897618278675e-05, "loss": 0.6257550716400146, "step": 2852 }, { "epoch": 1.0052863436123347, "grad_norm": 1.384078231158131, "learning_rate": 1.0745087544489132e-05, "loss": 0.49478042125701904, "step": 2853 }, { "epoch": 1.0056387665198239, "grad_norm": 1.3713236142324383, "learning_rate": 1.0739277217765558e-05, "loss": 0.6350952386856079, "step": 2854 }, { "epoch": 1.0059911894273128, "grad_norm": 1.4287669419061304, "learning_rate": 1.0733466640080374e-05, "loss": 0.6057480573654175, "step": 2855 }, { "epoch": 1.0063436123348017, "grad_norm": 1.5646694084149986, "learning_rate": 1.0727655813406094e-05, "loss": 0.5545427799224854, "step": 2856 }, { "epoch": 1.0066960352422907, "grad_norm": 1.371726691889951, "learning_rate": 1.0721844739715311e-05, "loss": 0.55484938621521, "step": 2857 }, { "epoch": 1.0070484581497798, "grad_norm": 1.6325523903522516, "learning_rate": 1.0716033420980703e-05, "loss": 0.6889834403991699, "step": 2858 }, { "epoch": 1.0074008810572688, "grad_norm": 1.928061303452338, "learning_rate": 1.0710221859175031e-05, "loss": 0.7259023189544678, "step": 2859 }, { "epoch": 1.0077533039647577, "grad_norm": 1.7213820381224034, "learning_rate": 1.0704410056271144e-05, "loss": 0.6200032234191895, "step": 2860 }, { "epoch": 1.0081057268722466, "grad_norm": 1.2488919699208767, "learning_rate": 1.069859801424196e-05, "loss": 0.5357909202575684, "step": 2861 }, { "epoch": 1.0084581497797356, "grad_norm": 1.462725629247434, "learning_rate": 1.0692785735060495e-05, "loss": 0.8121966123580933, "step": 2862 }, { "epoch": 1.0088105726872247, "grad_norm": 1.5047486906511685, "learning_rate": 1.0686973220699834e-05, "loss": 0.5698819160461426, "step": 2863 }, { "epoch": 1.0091629955947137, "grad_norm": 1.3352019656375154, "learning_rate": 1.0681160473133144e-05, "loss": 0.6598206162452698, "step": 2864 }, { "epoch": 1.0095154185022026, "grad_norm": 1.571854196128042, "learning_rate": 1.0675347494333667e-05, "loss": 0.7574363946914673, "step": 2865 }, { "epoch": 1.0098678414096915, "grad_norm": 2.0265508752029007, "learning_rate": 1.0669534286274737e-05, "loss": 0.6749663949012756, "step": 2866 }, { "epoch": 1.0102202643171807, "grad_norm": 1.5445692097493786, "learning_rate": 1.0663720850929753e-05, "loss": 0.5932409763336182, "step": 2867 }, { "epoch": 1.0105726872246696, "grad_norm": 1.4883467064779885, "learning_rate": 1.0657907190272197e-05, "loss": 0.7070773839950562, "step": 2868 }, { "epoch": 1.0109251101321586, "grad_norm": 1.6639794076635466, "learning_rate": 1.0652093306275621e-05, "loss": 0.531635582447052, "step": 2869 }, { "epoch": 1.0112775330396475, "grad_norm": 1.5967103256398283, "learning_rate": 1.0646279200913665e-05, "loss": 0.5966447591781616, "step": 2870 }, { "epoch": 1.0116299559471367, "grad_norm": 1.5047477869564347, "learning_rate": 1.0640464876160033e-05, "loss": 0.6308450698852539, "step": 2871 }, { "epoch": 1.0119823788546256, "grad_norm": 1.6938927429813924, "learning_rate": 1.0634650333988508e-05, "loss": 0.6477035284042358, "step": 2872 }, { "epoch": 1.0123348017621145, "grad_norm": 1.4725648899614407, "learning_rate": 1.0628835576372942e-05, "loss": 0.5856079459190369, "step": 2873 }, { "epoch": 1.0126872246696035, "grad_norm": 1.6415031005435194, "learning_rate": 1.062302060528727e-05, "loss": 0.733691930770874, "step": 2874 }, { "epoch": 1.0130396475770924, "grad_norm": 1.6528326658043055, "learning_rate": 1.0617205422705495e-05, "loss": 0.6020156145095825, "step": 2875 }, { "epoch": 1.0133920704845816, "grad_norm": 1.5978613503890422, "learning_rate": 1.0611390030601685e-05, "loss": 0.4980982542037964, "step": 2876 }, { "epoch": 1.0137444933920705, "grad_norm": 1.5178573200522583, "learning_rate": 1.0605574430949983e-05, "loss": 0.6498349905014038, "step": 2877 }, { "epoch": 1.0140969162995594, "grad_norm": 1.7318519084472541, "learning_rate": 1.0599758625724612e-05, "loss": 0.6456383466720581, "step": 2878 }, { "epoch": 1.0144493392070484, "grad_norm": 1.7056738628689527, "learning_rate": 1.059394261689985e-05, "loss": 0.6047386527061462, "step": 2879 }, { "epoch": 1.0148017621145375, "grad_norm": 1.6633316847391189, "learning_rate": 1.0588126406450056e-05, "loss": 0.641674816608429, "step": 2880 }, { "epoch": 1.0151541850220265, "grad_norm": 1.549495353719679, "learning_rate": 1.0582309996349648e-05, "loss": 0.6157702207565308, "step": 2881 }, { "epoch": 1.0155066079295154, "grad_norm": 1.614686141937513, "learning_rate": 1.057649338857312e-05, "loss": 0.6004809737205505, "step": 2882 }, { "epoch": 1.0158590308370044, "grad_norm": 1.460588924951717, "learning_rate": 1.0570676585095028e-05, "loss": 0.5534430742263794, "step": 2883 }, { "epoch": 1.0162114537444933, "grad_norm": 2.0058626486485367, "learning_rate": 1.0564859587889997e-05, "loss": 0.7781813144683838, "step": 2884 }, { "epoch": 1.0165638766519824, "grad_norm": 1.9228872779765243, "learning_rate": 1.0559042398932713e-05, "loss": 0.6949760913848877, "step": 2885 }, { "epoch": 1.0169162995594714, "grad_norm": 1.51396598780538, "learning_rate": 1.0553225020197932e-05, "loss": 0.5718453526496887, "step": 2886 }, { "epoch": 1.0172687224669603, "grad_norm": 1.7835909963123882, "learning_rate": 1.0547407453660471e-05, "loss": 0.6689345836639404, "step": 2887 }, { "epoch": 1.0176211453744493, "grad_norm": 1.5559332596209525, "learning_rate": 1.0541589701295222e-05, "loss": 0.6615442037582397, "step": 2888 }, { "epoch": 1.0179735682819384, "grad_norm": 1.4810070180145358, "learning_rate": 1.0535771765077121e-05, "loss": 0.6458337306976318, "step": 2889 }, { "epoch": 1.0183259911894273, "grad_norm": 1.4770072284014752, "learning_rate": 1.052995364698118e-05, "loss": 0.5330519676208496, "step": 2890 }, { "epoch": 1.0186784140969163, "grad_norm": 1.4780636522187705, "learning_rate": 1.0524135348982467e-05, "loss": 0.6219571232795715, "step": 2891 }, { "epoch": 1.0190308370044052, "grad_norm": 1.4624191661889683, "learning_rate": 1.0518316873056118e-05, "loss": 0.6731684803962708, "step": 2892 }, { "epoch": 1.0193832599118944, "grad_norm": 1.614741871357758, "learning_rate": 1.0512498221177319e-05, "loss": 0.6126813888549805, "step": 2893 }, { "epoch": 1.0197356828193833, "grad_norm": 1.4895494518265573, "learning_rate": 1.0506679395321325e-05, "loss": 0.5796904563903809, "step": 2894 }, { "epoch": 1.0200881057268723, "grad_norm": 1.5545739969005041, "learning_rate": 1.050086039746344e-05, "loss": 0.5765914916992188, "step": 2895 }, { "epoch": 1.0204405286343612, "grad_norm": 1.3710954206781227, "learning_rate": 1.0495041229579043e-05, "loss": 0.4798969328403473, "step": 2896 }, { "epoch": 1.0207929515418501, "grad_norm": 1.551476741605498, "learning_rate": 1.0489221893643553e-05, "loss": 0.673927366733551, "step": 2897 }, { "epoch": 1.0211453744493393, "grad_norm": 1.6211129054938926, "learning_rate": 1.0483402391632453e-05, "loss": 0.5681431293487549, "step": 2898 }, { "epoch": 1.0214977973568282, "grad_norm": 1.3128793329209902, "learning_rate": 1.0477582725521287e-05, "loss": 0.6156354546546936, "step": 2899 }, { "epoch": 1.0218502202643172, "grad_norm": 1.4369078255379546, "learning_rate": 1.0471762897285652e-05, "loss": 0.6569045782089233, "step": 2900 }, { "epoch": 1.022202643171806, "grad_norm": 1.4293089736412674, "learning_rate": 1.046594290890119e-05, "loss": 0.6125048995018005, "step": 2901 }, { "epoch": 1.0225550660792952, "grad_norm": 1.6465466140905431, "learning_rate": 1.0460122762343614e-05, "loss": 0.604046106338501, "step": 2902 }, { "epoch": 1.0229074889867842, "grad_norm": 1.5461286198100506, "learning_rate": 1.0454302459588677e-05, "loss": 0.4569816589355469, "step": 2903 }, { "epoch": 1.0232599118942731, "grad_norm": 1.6187784923192434, "learning_rate": 1.0448482002612194e-05, "loss": 0.5764607787132263, "step": 2904 }, { "epoch": 1.023612334801762, "grad_norm": 1.503585291483294, "learning_rate": 1.044266139339003e-05, "loss": 0.5859626531600952, "step": 2905 }, { "epoch": 1.023964757709251, "grad_norm": 1.6642769825669268, "learning_rate": 1.04368406338981e-05, "loss": 0.7326341271400452, "step": 2906 }, { "epoch": 1.0243171806167402, "grad_norm": 1.613324765385094, "learning_rate": 1.0431019726112366e-05, "loss": 0.6355161070823669, "step": 2907 }, { "epoch": 1.024669603524229, "grad_norm": 1.5833367942965741, "learning_rate": 1.0425198672008851e-05, "loss": 0.6990653872489929, "step": 2908 }, { "epoch": 1.025022026431718, "grad_norm": 2.3098262824716542, "learning_rate": 1.0419377473563621e-05, "loss": 0.631952166557312, "step": 2909 }, { "epoch": 1.025374449339207, "grad_norm": 1.4397039525414863, "learning_rate": 1.041355613275279e-05, "loss": 0.4872596561908722, "step": 2910 }, { "epoch": 1.0257268722466961, "grad_norm": 1.5222931253330352, "learning_rate": 1.0407734651552522e-05, "loss": 0.5334043502807617, "step": 2911 }, { "epoch": 1.026079295154185, "grad_norm": 1.5817730675020623, "learning_rate": 1.0401913031939026e-05, "loss": 0.5971134305000305, "step": 2912 }, { "epoch": 1.026431718061674, "grad_norm": 1.7562208471394358, "learning_rate": 1.0396091275888567e-05, "loss": 0.6527851819992065, "step": 2913 }, { "epoch": 1.026784140969163, "grad_norm": 1.5387477454353993, "learning_rate": 1.0390269385377444e-05, "loss": 0.4515818953514099, "step": 2914 }, { "epoch": 1.027136563876652, "grad_norm": 1.4624804092376522, "learning_rate": 1.0384447362382013e-05, "loss": 0.530797004699707, "step": 2915 }, { "epoch": 1.027488986784141, "grad_norm": 1.4915704465108583, "learning_rate": 1.0378625208878666e-05, "loss": 0.5477641224861145, "step": 2916 }, { "epoch": 1.02784140969163, "grad_norm": 1.6025052451883606, "learning_rate": 1.0372802926843843e-05, "loss": 0.6390479207038879, "step": 2917 }, { "epoch": 1.028193832599119, "grad_norm": 1.5706073153963707, "learning_rate": 1.0366980518254028e-05, "loss": 0.610755443572998, "step": 2918 }, { "epoch": 1.0285462555066078, "grad_norm": 1.4805888577219812, "learning_rate": 1.036115798508575e-05, "loss": 0.5427766442298889, "step": 2919 }, { "epoch": 1.028898678414097, "grad_norm": 1.4610582929917253, "learning_rate": 1.0355335329315573e-05, "loss": 0.621055006980896, "step": 2920 }, { "epoch": 1.029251101321586, "grad_norm": 1.7760527372961, "learning_rate": 1.0349512552920114e-05, "loss": 0.6098253726959229, "step": 2921 }, { "epoch": 1.0296035242290749, "grad_norm": 1.8967300437588117, "learning_rate": 1.0343689657876017e-05, "loss": 0.591664731502533, "step": 2922 }, { "epoch": 1.0299559471365638, "grad_norm": 1.616730113059231, "learning_rate": 1.033786664615998e-05, "loss": 0.6531485915184021, "step": 2923 }, { "epoch": 1.030308370044053, "grad_norm": 1.5937698715448299, "learning_rate": 1.0332043519748727e-05, "loss": 0.6933655738830566, "step": 2924 }, { "epoch": 1.030660792951542, "grad_norm": 1.5987643686429562, "learning_rate": 1.0326220280619036e-05, "loss": 0.6512705087661743, "step": 2925 }, { "epoch": 1.0310132158590308, "grad_norm": 1.829250792437923, "learning_rate": 1.0320396930747712e-05, "loss": 0.5671502947807312, "step": 2926 }, { "epoch": 1.0313656387665198, "grad_norm": 1.6239123058071627, "learning_rate": 1.0314573472111601e-05, "loss": 0.6795192360877991, "step": 2927 }, { "epoch": 1.0317180616740087, "grad_norm": 1.5985127083182307, "learning_rate": 1.0308749906687585e-05, "loss": 0.6357578039169312, "step": 2928 }, { "epoch": 1.0320704845814979, "grad_norm": 1.6982196546251649, "learning_rate": 1.0302926236452588e-05, "loss": 0.7009944915771484, "step": 2929 }, { "epoch": 1.0324229074889868, "grad_norm": 1.4806960711115318, "learning_rate": 1.0297102463383557e-05, "loss": 0.4685679078102112, "step": 2930 }, { "epoch": 1.0327753303964757, "grad_norm": 1.5429925693746163, "learning_rate": 1.0291278589457488e-05, "loss": 0.6359078884124756, "step": 2931 }, { "epoch": 1.0331277533039647, "grad_norm": 1.8631741910761805, "learning_rate": 1.0285454616651398e-05, "loss": 0.6606266498565674, "step": 2932 }, { "epoch": 1.0334801762114538, "grad_norm": 1.7076039728900445, "learning_rate": 1.0279630546942353e-05, "loss": 0.5405932664871216, "step": 2933 }, { "epoch": 1.0338325991189428, "grad_norm": 1.4934491606364382, "learning_rate": 1.0273806382307443e-05, "loss": 0.8072758316993713, "step": 2934 }, { "epoch": 1.0341850220264317, "grad_norm": 1.5899951805886359, "learning_rate": 1.0267982124723783e-05, "loss": 0.6923058032989502, "step": 2935 }, { "epoch": 1.0345374449339206, "grad_norm": 1.7156977270346485, "learning_rate": 1.0262157776168533e-05, "loss": 0.5577275156974792, "step": 2936 }, { "epoch": 1.0348898678414098, "grad_norm": 1.6363417924911698, "learning_rate": 1.0256333338618875e-05, "loss": 0.6780786514282227, "step": 2937 }, { "epoch": 1.0352422907488987, "grad_norm": 1.6093019454005904, "learning_rate": 1.0250508814052029e-05, "loss": 0.6966040134429932, "step": 2938 }, { "epoch": 1.0355947136563877, "grad_norm": 1.4912092272159942, "learning_rate": 1.0244684204445237e-05, "loss": 0.5726339817047119, "step": 2939 }, { "epoch": 1.0359471365638766, "grad_norm": 1.372791278777169, "learning_rate": 1.0238859511775768e-05, "loss": 0.64924156665802, "step": 2940 }, { "epoch": 1.0362995594713655, "grad_norm": 1.5498611273448277, "learning_rate": 1.0233034738020933e-05, "loss": 0.49121707677841187, "step": 2941 }, { "epoch": 1.0366519823788547, "grad_norm": 1.4698297870867278, "learning_rate": 1.0227209885158053e-05, "loss": 0.5505814552307129, "step": 2942 }, { "epoch": 1.0370044052863436, "grad_norm": 1.658171020881214, "learning_rate": 1.022138495516449e-05, "loss": 0.7429872751235962, "step": 2943 }, { "epoch": 1.0373568281938326, "grad_norm": 1.5946562373848934, "learning_rate": 1.0215559950017624e-05, "loss": 0.6492434740066528, "step": 2944 }, { "epoch": 1.0377092511013215, "grad_norm": 1.5139165780476451, "learning_rate": 1.0209734871694865e-05, "loss": 0.5418736338615417, "step": 2945 }, { "epoch": 1.0380616740088107, "grad_norm": 1.676058492453494, "learning_rate": 1.0203909722173644e-05, "loss": 0.6252620220184326, "step": 2946 }, { "epoch": 1.0384140969162996, "grad_norm": 1.4699238771485563, "learning_rate": 1.0198084503431416e-05, "loss": 0.5124455690383911, "step": 2947 }, { "epoch": 1.0387665198237885, "grad_norm": 1.4358343290990208, "learning_rate": 1.0192259217445663e-05, "loss": 0.5729688405990601, "step": 2948 }, { "epoch": 1.0391189427312775, "grad_norm": 1.8222711908460536, "learning_rate": 1.0186433866193893e-05, "loss": 0.5891536474227905, "step": 2949 }, { "epoch": 1.0394713656387666, "grad_norm": 1.7110443983801997, "learning_rate": 1.0180608451653626e-05, "loss": 0.774397075176239, "step": 2950 }, { "epoch": 1.0398237885462556, "grad_norm": 1.4480826912481708, "learning_rate": 1.0174782975802408e-05, "loss": 0.5987098813056946, "step": 2951 }, { "epoch": 1.0401762114537445, "grad_norm": 1.634577600554869, "learning_rate": 1.016895744061781e-05, "loss": 0.5334598422050476, "step": 2952 }, { "epoch": 1.0405286343612334, "grad_norm": 1.7236175912347957, "learning_rate": 1.0163131848077421e-05, "loss": 0.5946340560913086, "step": 2953 }, { "epoch": 1.0408810572687224, "grad_norm": 1.601606630295311, "learning_rate": 1.0157306200158847e-05, "loss": 0.5780941247940063, "step": 2954 }, { "epoch": 1.0412334801762115, "grad_norm": 1.6785528445522104, "learning_rate": 1.0151480498839712e-05, "loss": 0.6348963975906372, "step": 2955 }, { "epoch": 1.0415859030837005, "grad_norm": 1.717999985242494, "learning_rate": 1.014565474609766e-05, "loss": 0.6868102550506592, "step": 2956 }, { "epoch": 1.0419383259911894, "grad_norm": 1.6612318546166622, "learning_rate": 1.0139828943910358e-05, "loss": 0.6507548689842224, "step": 2957 }, { "epoch": 1.0422907488986783, "grad_norm": 1.7617270521903845, "learning_rate": 1.0134003094255478e-05, "loss": 0.6358312964439392, "step": 2958 }, { "epoch": 1.0426431718061675, "grad_norm": 1.5725895362844704, "learning_rate": 1.0128177199110723e-05, "loss": 0.7530224919319153, "step": 2959 }, { "epoch": 1.0429955947136564, "grad_norm": 1.5496338862557548, "learning_rate": 1.012235126045379e-05, "loss": 0.545819878578186, "step": 2960 }, { "epoch": 1.0433480176211454, "grad_norm": 1.5828250584633938, "learning_rate": 1.011652528026242e-05, "loss": 0.6626788377761841, "step": 2961 }, { "epoch": 1.0437004405286343, "grad_norm": 1.6913571400986156, "learning_rate": 1.0110699260514336e-05, "loss": 0.6407896280288696, "step": 2962 }, { "epoch": 1.0440528634361232, "grad_norm": 1.4558906354554821, "learning_rate": 1.0104873203187307e-05, "loss": 0.5633673667907715, "step": 2963 }, { "epoch": 1.0444052863436124, "grad_norm": 1.6991226564822444, "learning_rate": 1.0099047110259081e-05, "loss": 0.5356892943382263, "step": 2964 }, { "epoch": 1.0447577092511013, "grad_norm": 1.6571256461175092, "learning_rate": 1.0093220983707448e-05, "loss": 0.5527205467224121, "step": 2965 }, { "epoch": 1.0451101321585903, "grad_norm": 1.5928434384321621, "learning_rate": 1.008739482551019e-05, "loss": 0.6148320436477661, "step": 2966 }, { "epoch": 1.0454625550660792, "grad_norm": 1.8604930696261837, "learning_rate": 1.0081568637645111e-05, "loss": 0.5713976621627808, "step": 2967 }, { "epoch": 1.0458149779735684, "grad_norm": 1.4811105317563769, "learning_rate": 1.0075742422090015e-05, "loss": 0.5836226940155029, "step": 2968 }, { "epoch": 1.0461674008810573, "grad_norm": 1.829134506733255, "learning_rate": 1.0069916180822727e-05, "loss": 0.6452749371528625, "step": 2969 }, { "epoch": 1.0465198237885462, "grad_norm": 1.507975881410604, "learning_rate": 1.006408991582107e-05, "loss": 0.5468501448631287, "step": 2970 }, { "epoch": 1.0468722466960352, "grad_norm": 1.6217984014708016, "learning_rate": 1.0058263629062883e-05, "loss": 0.5195704698562622, "step": 2971 }, { "epoch": 1.0472246696035241, "grad_norm": 1.603914403857505, "learning_rate": 1.0052437322526003e-05, "loss": 0.5144641995429993, "step": 2972 }, { "epoch": 1.0475770925110133, "grad_norm": 1.767647834896278, "learning_rate": 1.004661099818829e-05, "loss": 0.7258927822113037, "step": 2973 }, { "epoch": 1.0479295154185022, "grad_norm": 1.8920163745404244, "learning_rate": 1.004078465802759e-05, "loss": 0.6108053922653198, "step": 2974 }, { "epoch": 1.0482819383259911, "grad_norm": 1.5703096539855212, "learning_rate": 1.0034958304021766e-05, "loss": 0.612535834312439, "step": 2975 }, { "epoch": 1.04863436123348, "grad_norm": 1.6902304674604145, "learning_rate": 1.0029131938148686e-05, "loss": 0.7272380590438843, "step": 2976 }, { "epoch": 1.0489867841409692, "grad_norm": 1.4306480582223446, "learning_rate": 1.0023305562386222e-05, "loss": 0.4748264253139496, "step": 2977 }, { "epoch": 1.0493392070484582, "grad_norm": 1.7625234188194432, "learning_rate": 1.0017479178712245e-05, "loss": 0.6686758399009705, "step": 2978 }, { "epoch": 1.0496916299559471, "grad_norm": 1.6796969203533192, "learning_rate": 1.0011652789104631e-05, "loss": 0.5003838539123535, "step": 2979 }, { "epoch": 1.050044052863436, "grad_norm": 1.7305572983583226, "learning_rate": 1.0005826395541257e-05, "loss": 0.6210055351257324, "step": 2980 }, { "epoch": 1.0503964757709252, "grad_norm": 1.6943397299052507, "learning_rate": 1e-05, "loss": 0.6160269975662231, "step": 2981 }, { "epoch": 1.0507488986784141, "grad_norm": 1.6249468093767248, "learning_rate": 9.994173604458748e-06, "loss": 0.6432052850723267, "step": 2982 }, { "epoch": 1.051101321585903, "grad_norm": 1.6764234439374022, "learning_rate": 9.988347210895372e-06, "loss": 0.588628888130188, "step": 2983 }, { "epoch": 1.051453744493392, "grad_norm": 1.5595740377523009, "learning_rate": 9.982520821287758e-06, "loss": 0.6694320440292358, "step": 2984 }, { "epoch": 1.051806167400881, "grad_norm": 1.7276474901524372, "learning_rate": 9.976694437613778e-06, "loss": 0.8591301441192627, "step": 2985 }, { "epoch": 1.0521585903083701, "grad_norm": 1.6697380234108412, "learning_rate": 9.970868061851315e-06, "loss": 0.6000436544418335, "step": 2986 }, { "epoch": 1.052511013215859, "grad_norm": 1.5357275356358564, "learning_rate": 9.965041695978239e-06, "loss": 0.624568521976471, "step": 2987 }, { "epoch": 1.052863436123348, "grad_norm": 1.4223866897031825, "learning_rate": 9.959215341972414e-06, "loss": 0.6173535585403442, "step": 2988 }, { "epoch": 1.053215859030837, "grad_norm": 1.7069399452687213, "learning_rate": 9.953389001811716e-06, "loss": 0.5991729497909546, "step": 2989 }, { "epoch": 1.053568281938326, "grad_norm": 1.782972390393551, "learning_rate": 9.947562677473999e-06, "loss": 0.570953905582428, "step": 2990 }, { "epoch": 1.053920704845815, "grad_norm": 1.7332305108715658, "learning_rate": 9.941736370937119e-06, "loss": 0.6079390048980713, "step": 2991 }, { "epoch": 1.054273127753304, "grad_norm": 2.110617001097567, "learning_rate": 9.935910084178934e-06, "loss": 0.599539577960968, "step": 2992 }, { "epoch": 1.0546255506607929, "grad_norm": 1.5854202353385896, "learning_rate": 9.930083819177273e-06, "loss": 0.6736180186271667, "step": 2993 }, { "epoch": 1.054977973568282, "grad_norm": 1.6240153775210555, "learning_rate": 9.924257577909987e-06, "loss": 0.6953197717666626, "step": 2994 }, { "epoch": 1.055330396475771, "grad_norm": 1.8737137053755175, "learning_rate": 9.918431362354892e-06, "loss": 0.6670099496841431, "step": 2995 }, { "epoch": 1.05568281938326, "grad_norm": 1.844007753613641, "learning_rate": 9.912605174489811e-06, "loss": 0.5829994678497314, "step": 2996 }, { "epoch": 1.0560352422907489, "grad_norm": 1.9198236703913207, "learning_rate": 9.906779016292554e-06, "loss": 0.5926212072372437, "step": 2997 }, { "epoch": 1.0563876651982378, "grad_norm": 1.4868752944824364, "learning_rate": 9.900952889740922e-06, "loss": 0.6085237860679626, "step": 2998 }, { "epoch": 1.056740088105727, "grad_norm": 1.8046049827658854, "learning_rate": 9.895126796812698e-06, "loss": 0.5348918437957764, "step": 2999 }, { "epoch": 1.0570925110132159, "grad_norm": 1.79509807280399, "learning_rate": 9.889300739485666e-06, "loss": 0.6325811743736267, "step": 3000 }, { "epoch": 1.0574449339207048, "grad_norm": 1.6006099839795653, "learning_rate": 9.883474719737582e-06, "loss": 0.6262463927268982, "step": 3001 }, { "epoch": 1.0577973568281938, "grad_norm": 1.5914788157951554, "learning_rate": 9.877648739546213e-06, "loss": 0.5863393545150757, "step": 3002 }, { "epoch": 1.058149779735683, "grad_norm": 2.0254476885032924, "learning_rate": 9.871822800889284e-06, "loss": 0.6200219392776489, "step": 3003 }, { "epoch": 1.0585022026431719, "grad_norm": 1.6216300774961065, "learning_rate": 9.865996905744523e-06, "loss": 0.6994227170944214, "step": 3004 }, { "epoch": 1.0588546255506608, "grad_norm": 1.735404014120002, "learning_rate": 9.860171056089646e-06, "loss": 0.6458406448364258, "step": 3005 }, { "epoch": 1.0592070484581497, "grad_norm": 1.6209915560634427, "learning_rate": 9.854345253902342e-06, "loss": 0.6814782619476318, "step": 3006 }, { "epoch": 1.0595594713656387, "grad_norm": 1.455508358080935, "learning_rate": 9.84851950116029e-06, "loss": 0.521275520324707, "step": 3007 }, { "epoch": 1.0599118942731278, "grad_norm": 1.486020788258086, "learning_rate": 9.84269379984116e-06, "loss": 0.5541207790374756, "step": 3008 }, { "epoch": 1.0602643171806168, "grad_norm": 1.7060435970959642, "learning_rate": 9.836868151922579e-06, "loss": 0.578704833984375, "step": 3009 }, { "epoch": 1.0606167400881057, "grad_norm": 1.5220368339292814, "learning_rate": 9.831042559382193e-06, "loss": 0.6280980706214905, "step": 3010 }, { "epoch": 1.0609691629955946, "grad_norm": 1.8314917502019485, "learning_rate": 9.825217024197595e-06, "loss": 0.6059408783912659, "step": 3011 }, { "epoch": 1.0613215859030838, "grad_norm": 1.6362891327789773, "learning_rate": 9.819391548346377e-06, "loss": 0.6375449299812317, "step": 3012 }, { "epoch": 1.0616740088105727, "grad_norm": 2.503364134053993, "learning_rate": 9.81356613380611e-06, "loss": 0.5959592461585999, "step": 3013 }, { "epoch": 1.0620264317180617, "grad_norm": 1.735073300438408, "learning_rate": 9.807740782554337e-06, "loss": 0.7636409401893616, "step": 3014 }, { "epoch": 1.0623788546255506, "grad_norm": 2.2227407713805722, "learning_rate": 9.801915496568586e-06, "loss": 0.6136656999588013, "step": 3015 }, { "epoch": 1.0627312775330395, "grad_norm": 1.7360474444382674, "learning_rate": 9.796090277826361e-06, "loss": 0.4659839868545532, "step": 3016 }, { "epoch": 1.0630837004405287, "grad_norm": 1.699131973967987, "learning_rate": 9.790265128305137e-06, "loss": 0.6053155660629272, "step": 3017 }, { "epoch": 1.0634361233480176, "grad_norm": 1.698457126583602, "learning_rate": 9.78444004998238e-06, "loss": 0.6885203123092651, "step": 3018 }, { "epoch": 1.0637885462555066, "grad_norm": 1.5620062631250171, "learning_rate": 9.778615044835513e-06, "loss": 0.4985584616661072, "step": 3019 }, { "epoch": 1.0641409691629955, "grad_norm": 1.699890122838272, "learning_rate": 9.772790114841948e-06, "loss": 0.5782307386398315, "step": 3020 }, { "epoch": 1.0644933920704847, "grad_norm": 1.7427928970766464, "learning_rate": 9.766965261979072e-06, "loss": 0.5819451212882996, "step": 3021 }, { "epoch": 1.0648458149779736, "grad_norm": 1.9531302264016444, "learning_rate": 9.761140488224232e-06, "loss": 0.7316779494285583, "step": 3022 }, { "epoch": 1.0651982378854625, "grad_norm": 2.4211241065200633, "learning_rate": 9.755315795554766e-06, "loss": 0.5986718535423279, "step": 3023 }, { "epoch": 1.0655506607929515, "grad_norm": 1.5565361520380023, "learning_rate": 9.749491185947977e-06, "loss": 0.5052427053451538, "step": 3024 }, { "epoch": 1.0659030837004406, "grad_norm": 1.658020296029534, "learning_rate": 9.743666661381123e-06, "loss": 0.7370901107788086, "step": 3025 }, { "epoch": 1.0662555066079296, "grad_norm": 1.575987435195716, "learning_rate": 9.73784222383147e-06, "loss": 0.6423007249832153, "step": 3026 }, { "epoch": 1.0666079295154185, "grad_norm": 1.94896820476588, "learning_rate": 9.73201787527622e-06, "loss": 0.5679126977920532, "step": 3027 }, { "epoch": 1.0669603524229074, "grad_norm": 2.498602043471406, "learning_rate": 9.72619361769256e-06, "loss": 0.5890183448791504, "step": 3028 }, { "epoch": 1.0673127753303966, "grad_norm": 1.7647674693242208, "learning_rate": 9.720369453057648e-06, "loss": 0.6772822141647339, "step": 3029 }, { "epoch": 1.0676651982378855, "grad_norm": 2.109810086892336, "learning_rate": 9.714545383348602e-06, "loss": 0.8275488615036011, "step": 3030 }, { "epoch": 1.0680176211453745, "grad_norm": 1.6620933678667917, "learning_rate": 9.708721410542517e-06, "loss": 0.5369541645050049, "step": 3031 }, { "epoch": 1.0683700440528634, "grad_norm": 1.611800532750273, "learning_rate": 9.70289753661645e-06, "loss": 0.7173746824264526, "step": 3032 }, { "epoch": 1.0687224669603523, "grad_norm": 1.7405771304623092, "learning_rate": 9.697073763547415e-06, "loss": 0.597034215927124, "step": 3033 }, { "epoch": 1.0690748898678415, "grad_norm": 1.867958529307263, "learning_rate": 9.691250093312419e-06, "loss": 0.6680281162261963, "step": 3034 }, { "epoch": 1.0694273127753304, "grad_norm": 1.4898600082698874, "learning_rate": 9.6854265278884e-06, "loss": 0.6155321002006531, "step": 3035 }, { "epoch": 1.0697797356828194, "grad_norm": 2.4613840016445314, "learning_rate": 9.67960306925229e-06, "loss": 0.5945199728012085, "step": 3036 }, { "epoch": 1.0701321585903083, "grad_norm": 1.7063166475670735, "learning_rate": 9.673779719380967e-06, "loss": 0.6492328643798828, "step": 3037 }, { "epoch": 1.0704845814977975, "grad_norm": 1.8638826733925389, "learning_rate": 9.667956480251273e-06, "loss": 0.6501325964927673, "step": 3038 }, { "epoch": 1.0708370044052864, "grad_norm": 1.4216071761527918, "learning_rate": 9.662133353840025e-06, "loss": 0.5956053733825684, "step": 3039 }, { "epoch": 1.0711894273127753, "grad_norm": 1.7546711372901296, "learning_rate": 9.656310342123988e-06, "loss": 0.5966510772705078, "step": 3040 }, { "epoch": 1.0715418502202643, "grad_norm": 1.7715803220306194, "learning_rate": 9.65048744707989e-06, "loss": 0.7096615433692932, "step": 3041 }, { "epoch": 1.0718942731277532, "grad_norm": 1.5279732385894715, "learning_rate": 9.644664670684429e-06, "loss": 0.6697839498519897, "step": 3042 }, { "epoch": 1.0722466960352424, "grad_norm": 1.6318262899161158, "learning_rate": 9.638842014914253e-06, "loss": 0.6288081407546997, "step": 3043 }, { "epoch": 1.0725991189427313, "grad_norm": 1.6830476156095877, "learning_rate": 9.633019481745973e-06, "loss": 0.5870436429977417, "step": 3044 }, { "epoch": 1.0729515418502202, "grad_norm": 1.4073037692368846, "learning_rate": 9.62719707315616e-06, "loss": 0.5540846586227417, "step": 3045 }, { "epoch": 1.0733039647577092, "grad_norm": 1.8276869267624827, "learning_rate": 9.621374791121335e-06, "loss": 0.6134544014930725, "step": 3046 }, { "epoch": 1.0736563876651983, "grad_norm": 1.9310361455307938, "learning_rate": 9.61555263761799e-06, "loss": 0.5537046194076538, "step": 3047 }, { "epoch": 1.0740088105726873, "grad_norm": 1.5553451953770387, "learning_rate": 9.60973061462256e-06, "loss": 0.6423748731613159, "step": 3048 }, { "epoch": 1.0743612334801762, "grad_norm": 1.7219317421679232, "learning_rate": 9.603908724111438e-06, "loss": 0.575737714767456, "step": 3049 }, { "epoch": 1.0747136563876651, "grad_norm": 1.7334347992355148, "learning_rate": 9.598086968060976e-06, "loss": 0.5326197147369385, "step": 3050 }, { "epoch": 1.075066079295154, "grad_norm": 1.5560472770838902, "learning_rate": 9.592265348447481e-06, "loss": 0.6533973217010498, "step": 3051 }, { "epoch": 1.0754185022026432, "grad_norm": 1.5101678591543142, "learning_rate": 9.586443867247212e-06, "loss": 0.5536586046218872, "step": 3052 }, { "epoch": 1.0757709251101322, "grad_norm": 1.6611779528904365, "learning_rate": 9.580622526436382e-06, "loss": 0.6024892926216125, "step": 3053 }, { "epoch": 1.076123348017621, "grad_norm": 1.5423440836231639, "learning_rate": 9.574801327991148e-06, "loss": 0.5070478320121765, "step": 3054 }, { "epoch": 1.07647577092511, "grad_norm": 1.9040251147858696, "learning_rate": 9.568980273887637e-06, "loss": 0.6518458127975464, "step": 3055 }, { "epoch": 1.0768281938325992, "grad_norm": 1.8761852451910037, "learning_rate": 9.563159366101905e-06, "loss": 0.6120346784591675, "step": 3056 }, { "epoch": 1.0771806167400881, "grad_norm": 1.7428937123650154, "learning_rate": 9.557338606609973e-06, "loss": 0.6725353598594666, "step": 3057 }, { "epoch": 1.077533039647577, "grad_norm": 1.5136863007311347, "learning_rate": 9.551517997387809e-06, "loss": 0.5311183333396912, "step": 3058 }, { "epoch": 1.077885462555066, "grad_norm": 1.8000300040025692, "learning_rate": 9.545697540411324e-06, "loss": 0.5728713274002075, "step": 3059 }, { "epoch": 1.0782378854625552, "grad_norm": 1.7991281029512354, "learning_rate": 9.53987723765639e-06, "loss": 0.5527676343917847, "step": 3060 }, { "epoch": 1.078590308370044, "grad_norm": 1.9177712397501578, "learning_rate": 9.534057091098813e-06, "loss": 0.7529809474945068, "step": 3061 }, { "epoch": 1.078942731277533, "grad_norm": 1.6975104946869117, "learning_rate": 9.528237102714352e-06, "loss": 0.5485205054283142, "step": 3062 }, { "epoch": 1.079295154185022, "grad_norm": 1.8773141561341242, "learning_rate": 9.522417274478716e-06, "loss": 0.785184383392334, "step": 3063 }, { "epoch": 1.079647577092511, "grad_norm": 1.692195190429073, "learning_rate": 9.516597608367547e-06, "loss": 0.5645574331283569, "step": 3064 }, { "epoch": 1.08, "grad_norm": 1.6815198266991151, "learning_rate": 9.51077810635645e-06, "loss": 0.5878466367721558, "step": 3065 }, { "epoch": 1.080352422907489, "grad_norm": 1.7635464385467587, "learning_rate": 9.504958770420962e-06, "loss": 0.6610634922981262, "step": 3066 }, { "epoch": 1.080704845814978, "grad_norm": 1.8113852263213976, "learning_rate": 9.49913960253656e-06, "loss": 0.5928626656532288, "step": 3067 }, { "epoch": 1.0810572687224669, "grad_norm": 1.7322633216843277, "learning_rate": 9.49332060467868e-06, "loss": 0.7038083672523499, "step": 3068 }, { "epoch": 1.081409691629956, "grad_norm": 1.3686406289588096, "learning_rate": 9.487501778822685e-06, "loss": 0.5966217517852783, "step": 3069 }, { "epoch": 1.081762114537445, "grad_norm": 1.686172060324731, "learning_rate": 9.481683126943884e-06, "loss": 0.6594187021255493, "step": 3070 }, { "epoch": 1.082114537444934, "grad_norm": 1.4709153501511232, "learning_rate": 9.475864651017536e-06, "loss": 0.450161874294281, "step": 3071 }, { "epoch": 1.0824669603524228, "grad_norm": 1.9209170149530705, "learning_rate": 9.470046353018821e-06, "loss": 0.6459252834320068, "step": 3072 }, { "epoch": 1.082819383259912, "grad_norm": 1.5818284678879686, "learning_rate": 9.464228234922882e-06, "loss": 0.6505793929100037, "step": 3073 }, { "epoch": 1.083171806167401, "grad_norm": 1.5944722571395005, "learning_rate": 9.458410298704781e-06, "loss": 0.6480910778045654, "step": 3074 }, { "epoch": 1.0835242290748899, "grad_norm": 1.530550500951046, "learning_rate": 9.452592546339527e-06, "loss": 0.6494983434677124, "step": 3075 }, { "epoch": 1.0838766519823788, "grad_norm": 1.560525752678919, "learning_rate": 9.44677497980207e-06, "loss": 0.4710897207260132, "step": 3076 }, { "epoch": 1.0842290748898677, "grad_norm": 1.5265540562186208, "learning_rate": 9.440957601067294e-06, "loss": 0.599402904510498, "step": 3077 }, { "epoch": 1.084581497797357, "grad_norm": 1.9340764168188993, "learning_rate": 9.435140412110006e-06, "loss": 0.665642499923706, "step": 3078 }, { "epoch": 1.0849339207048458, "grad_norm": 1.8868033009058576, "learning_rate": 9.429323414904975e-06, "loss": 0.5861828923225403, "step": 3079 }, { "epoch": 1.0852863436123348, "grad_norm": 1.581789489047221, "learning_rate": 9.42350661142688e-06, "loss": 0.6115351915359497, "step": 3080 }, { "epoch": 1.0856387665198237, "grad_norm": 1.6610293276945491, "learning_rate": 9.417690003650353e-06, "loss": 0.6627066135406494, "step": 3081 }, { "epoch": 1.0859911894273129, "grad_norm": 1.5744692750190625, "learning_rate": 9.411873593549947e-06, "loss": 0.6155676245689392, "step": 3082 }, { "epoch": 1.0863436123348018, "grad_norm": 1.59429166731528, "learning_rate": 9.406057383100151e-06, "loss": 0.5429089069366455, "step": 3083 }, { "epoch": 1.0866960352422907, "grad_norm": 1.638763712553269, "learning_rate": 9.400241374275391e-06, "loss": 0.5416614413261414, "step": 3084 }, { "epoch": 1.0870484581497797, "grad_norm": 1.5652840639245515, "learning_rate": 9.394425569050018e-06, "loss": 0.6708710193634033, "step": 3085 }, { "epoch": 1.0874008810572686, "grad_norm": 1.6407899201706977, "learning_rate": 9.388609969398318e-06, "loss": 0.588347315788269, "step": 3086 }, { "epoch": 1.0877533039647578, "grad_norm": 1.6990356352816562, "learning_rate": 9.38279457729451e-06, "loss": 0.4999222755432129, "step": 3087 }, { "epoch": 1.0881057268722467, "grad_norm": 1.5508462782114225, "learning_rate": 9.37697939471273e-06, "loss": 0.5400034189224243, "step": 3088 }, { "epoch": 1.0884581497797357, "grad_norm": 1.6869985582255194, "learning_rate": 9.37116442362706e-06, "loss": 0.5583670139312744, "step": 3089 }, { "epoch": 1.0888105726872246, "grad_norm": 2.063349590123988, "learning_rate": 9.365349666011497e-06, "loss": 0.6863820552825928, "step": 3090 }, { "epoch": 1.0891629955947137, "grad_norm": 1.7395123823701124, "learning_rate": 9.35953512383997e-06, "loss": 0.6422115564346313, "step": 3091 }, { "epoch": 1.0895154185022027, "grad_norm": 1.7254266288951046, "learning_rate": 9.353720799086337e-06, "loss": 0.7106888294219971, "step": 3092 }, { "epoch": 1.0898678414096916, "grad_norm": 1.7765997338600088, "learning_rate": 9.347906693724379e-06, "loss": 0.6070472002029419, "step": 3093 }, { "epoch": 1.0902202643171806, "grad_norm": 2.653468303504809, "learning_rate": 9.342092809727807e-06, "loss": 0.577377200126648, "step": 3094 }, { "epoch": 1.0905726872246695, "grad_norm": 2.222722693331331, "learning_rate": 9.336279149070252e-06, "loss": 0.6249948740005493, "step": 3095 }, { "epoch": 1.0909251101321586, "grad_norm": 1.7155188858933852, "learning_rate": 9.330465713725265e-06, "loss": 0.5515183210372925, "step": 3096 }, { "epoch": 1.0912775330396476, "grad_norm": 1.866411497064146, "learning_rate": 9.324652505666336e-06, "loss": 0.6074613332748413, "step": 3097 }, { "epoch": 1.0916299559471365, "grad_norm": 1.7632595046666684, "learning_rate": 9.318839526866863e-06, "loss": 0.6520178318023682, "step": 3098 }, { "epoch": 1.0919823788546255, "grad_norm": 1.4274715968201055, "learning_rate": 9.31302677930017e-06, "loss": 0.45863813161849976, "step": 3099 }, { "epoch": 1.0923348017621146, "grad_norm": 1.6772052003130429, "learning_rate": 9.307214264939508e-06, "loss": 0.610805869102478, "step": 3100 }, { "epoch": 1.0926872246696036, "grad_norm": 1.6545163632346178, "learning_rate": 9.30140198575804e-06, "loss": 0.5954282283782959, "step": 3101 }, { "epoch": 1.0930396475770925, "grad_norm": 1.4805927694864789, "learning_rate": 9.29558994372886e-06, "loss": 0.6941400170326233, "step": 3102 }, { "epoch": 1.0933920704845814, "grad_norm": 1.4236727289117346, "learning_rate": 9.289778140824974e-06, "loss": 0.6723533868789673, "step": 3103 }, { "epoch": 1.0937444933920706, "grad_norm": 1.5690147341016918, "learning_rate": 9.2839665790193e-06, "loss": 0.49137037992477417, "step": 3104 }, { "epoch": 1.0940969162995595, "grad_norm": 1.6112616837583658, "learning_rate": 9.278155260284692e-06, "loss": 0.5827045440673828, "step": 3105 }, { "epoch": 1.0944493392070485, "grad_norm": 1.7496187485651187, "learning_rate": 9.272344186593909e-06, "loss": 0.6391462683677673, "step": 3106 }, { "epoch": 1.0948017621145374, "grad_norm": 1.857839078789808, "learning_rate": 9.266533359919628e-06, "loss": 0.4994915723800659, "step": 3107 }, { "epoch": 1.0951541850220265, "grad_norm": 1.7820549618718244, "learning_rate": 9.260722782234445e-06, "loss": 0.6480728387832642, "step": 3108 }, { "epoch": 1.0955066079295155, "grad_norm": 1.9724258404436363, "learning_rate": 9.25491245551087e-06, "loss": 0.5734057426452637, "step": 3109 }, { "epoch": 1.0958590308370044, "grad_norm": 1.5757198230236702, "learning_rate": 9.249102381721328e-06, "loss": 0.5650345087051392, "step": 3110 }, { "epoch": 1.0962114537444934, "grad_norm": 1.6196253415823336, "learning_rate": 9.243292562838164e-06, "loss": 0.6261975765228271, "step": 3111 }, { "epoch": 1.0965638766519823, "grad_norm": 1.6283298345999566, "learning_rate": 9.237483000833619e-06, "loss": 0.730735182762146, "step": 3112 }, { "epoch": 1.0969162995594715, "grad_norm": 1.614573149399901, "learning_rate": 9.231673697679867e-06, "loss": 0.6198948621749878, "step": 3113 }, { "epoch": 1.0972687224669604, "grad_norm": 1.526191646446162, "learning_rate": 9.225864655348982e-06, "loss": 0.5302865505218506, "step": 3114 }, { "epoch": 1.0976211453744493, "grad_norm": 1.6895671377093768, "learning_rate": 9.220055875812955e-06, "loss": 0.5995128154754639, "step": 3115 }, { "epoch": 1.0979735682819383, "grad_norm": 1.5451580100020488, "learning_rate": 9.214247361043687e-06, "loss": 0.3801479935646057, "step": 3116 }, { "epoch": 1.0983259911894274, "grad_norm": 1.7467243659333909, "learning_rate": 9.208439113012984e-06, "loss": 0.5617209076881409, "step": 3117 }, { "epoch": 1.0986784140969164, "grad_norm": 2.3313501330545776, "learning_rate": 9.202631133692572e-06, "loss": 0.5233842134475708, "step": 3118 }, { "epoch": 1.0990308370044053, "grad_norm": 1.5308784453968334, "learning_rate": 9.196823425054073e-06, "loss": 0.5300124883651733, "step": 3119 }, { "epoch": 1.0993832599118942, "grad_norm": 1.6766914696070794, "learning_rate": 9.191015989069024e-06, "loss": 0.686185359954834, "step": 3120 }, { "epoch": 1.0997356828193832, "grad_norm": 4.625699614895419, "learning_rate": 9.18520882770887e-06, "loss": 0.6043056845664978, "step": 3121 }, { "epoch": 1.1000881057268723, "grad_norm": 1.4445640616396158, "learning_rate": 9.179401942944961e-06, "loss": 0.6299905776977539, "step": 3122 }, { "epoch": 1.1004405286343613, "grad_norm": 1.639683344548818, "learning_rate": 9.173595336748557e-06, "loss": 0.57872474193573, "step": 3123 }, { "epoch": 1.1007929515418502, "grad_norm": 1.6533643796746975, "learning_rate": 9.167789011090818e-06, "loss": 0.5638746023178101, "step": 3124 }, { "epoch": 1.1011453744493391, "grad_norm": 1.9780317067618627, "learning_rate": 9.161982967942806e-06, "loss": 0.6150490045547485, "step": 3125 }, { "epoch": 1.1014977973568283, "grad_norm": 1.6035565827670604, "learning_rate": 9.156177209275503e-06, "loss": 0.547231912612915, "step": 3126 }, { "epoch": 1.1018502202643172, "grad_norm": 1.753224578445511, "learning_rate": 9.150371737059773e-06, "loss": 0.6999325752258301, "step": 3127 }, { "epoch": 1.1022026431718062, "grad_norm": 1.868897492269033, "learning_rate": 9.144566553266396e-06, "loss": 0.7175568342208862, "step": 3128 }, { "epoch": 1.102555066079295, "grad_norm": 1.6615553040601516, "learning_rate": 9.138761659866054e-06, "loss": 0.7308273911476135, "step": 3129 }, { "epoch": 1.102907488986784, "grad_norm": 1.6216416819643327, "learning_rate": 9.132957058829323e-06, "loss": 0.5951930284500122, "step": 3130 }, { "epoch": 1.1032599118942732, "grad_norm": 1.8459198222998503, "learning_rate": 9.127152752126688e-06, "loss": 0.5684988498687744, "step": 3131 }, { "epoch": 1.1036123348017621, "grad_norm": 1.6778026851292638, "learning_rate": 9.121348741728532e-06, "loss": 0.6490764617919922, "step": 3132 }, { "epoch": 1.103964757709251, "grad_norm": 1.9759558630482505, "learning_rate": 9.115545029605129e-06, "loss": 0.7795257568359375, "step": 3133 }, { "epoch": 1.10431718061674, "grad_norm": 1.677150279034534, "learning_rate": 9.10974161772667e-06, "loss": 0.5443774461746216, "step": 3134 }, { "epoch": 1.1046696035242292, "grad_norm": 1.4979331299176493, "learning_rate": 9.103938508063223e-06, "loss": 0.48989373445510864, "step": 3135 }, { "epoch": 1.105022026431718, "grad_norm": 1.7384756252454785, "learning_rate": 9.098135702584762e-06, "loss": 0.5628808736801147, "step": 3136 }, { "epoch": 1.105374449339207, "grad_norm": 1.7853238397751252, "learning_rate": 9.092333203261168e-06, "loss": 0.6549321413040161, "step": 3137 }, { "epoch": 1.105726872246696, "grad_norm": 1.6854667721006384, "learning_rate": 9.0865310120622e-06, "loss": 0.7353606224060059, "step": 3138 }, { "epoch": 1.106079295154185, "grad_norm": 1.4467352618974103, "learning_rate": 9.080729130957528e-06, "loss": 0.650668203830719, "step": 3139 }, { "epoch": 1.106431718061674, "grad_norm": 1.4313841589857448, "learning_rate": 9.07492756191671e-06, "loss": 0.5618860721588135, "step": 3140 }, { "epoch": 1.106784140969163, "grad_norm": 1.6263891772619556, "learning_rate": 9.069126306909187e-06, "loss": 0.5532773733139038, "step": 3141 }, { "epoch": 1.107136563876652, "grad_norm": 1.5761547934103723, "learning_rate": 9.06332536790432e-06, "loss": 0.6240289211273193, "step": 3142 }, { "epoch": 1.1074889867841409, "grad_norm": 1.6326282131144043, "learning_rate": 9.057524746871335e-06, "loss": 0.5952814221382141, "step": 3143 }, { "epoch": 1.10784140969163, "grad_norm": 1.7063742447281478, "learning_rate": 9.051724445779373e-06, "loss": 0.6011646389961243, "step": 3144 }, { "epoch": 1.108193832599119, "grad_norm": 1.54385403751274, "learning_rate": 9.045924466597448e-06, "loss": 0.6964641213417053, "step": 3145 }, { "epoch": 1.108546255506608, "grad_norm": 1.9798851390043897, "learning_rate": 9.040124811294473e-06, "loss": 0.6821622848510742, "step": 3146 }, { "epoch": 1.1088986784140968, "grad_norm": 1.569676973352834, "learning_rate": 9.034325481839253e-06, "loss": 0.5045080184936523, "step": 3147 }, { "epoch": 1.109251101321586, "grad_norm": 1.608921739397865, "learning_rate": 9.028526480200482e-06, "loss": 0.5709735155105591, "step": 3148 }, { "epoch": 1.109603524229075, "grad_norm": 1.6331449251948336, "learning_rate": 9.022727808346731e-06, "loss": 0.5882325172424316, "step": 3149 }, { "epoch": 1.1099559471365639, "grad_norm": 1.6560869042500304, "learning_rate": 9.016929468246482e-06, "loss": 0.627426266670227, "step": 3150 }, { "epoch": 1.1103083700440528, "grad_norm": 1.5720686051365462, "learning_rate": 9.011131461868078e-06, "loss": 0.42419761419296265, "step": 3151 }, { "epoch": 1.110660792951542, "grad_norm": 1.487398401726564, "learning_rate": 9.005333791179775e-06, "loss": 0.5261023044586182, "step": 3152 }, { "epoch": 1.111013215859031, "grad_norm": 1.853640852117203, "learning_rate": 8.999536458149692e-06, "loss": 0.6654448509216309, "step": 3153 }, { "epoch": 1.1113656387665198, "grad_norm": 1.8252144061899127, "learning_rate": 8.993739464745843e-06, "loss": 0.5939514636993408, "step": 3154 }, { "epoch": 1.1117180616740088, "grad_norm": 2.120048901517583, "learning_rate": 8.987942812936133e-06, "loss": 0.6381959319114685, "step": 3155 }, { "epoch": 1.1120704845814977, "grad_norm": 1.5708485505419778, "learning_rate": 8.982146504688343e-06, "loss": 0.5474847555160522, "step": 3156 }, { "epoch": 1.1124229074889869, "grad_norm": 1.9617265332983251, "learning_rate": 8.97635054197013e-06, "loss": 0.6306884288787842, "step": 3157 }, { "epoch": 1.1127753303964758, "grad_norm": 1.6582794196349533, "learning_rate": 8.97055492674906e-06, "loss": 0.5988807678222656, "step": 3158 }, { "epoch": 1.1131277533039647, "grad_norm": 1.4627681911625667, "learning_rate": 8.964759660992547e-06, "loss": 0.6316757202148438, "step": 3159 }, { "epoch": 1.1134801762114537, "grad_norm": 2.1475966254528265, "learning_rate": 8.958964746667917e-06, "loss": 0.6031370162963867, "step": 3160 }, { "epoch": 1.1138325991189428, "grad_norm": 1.631780585948097, "learning_rate": 8.953170185742357e-06, "loss": 0.6334977149963379, "step": 3161 }, { "epoch": 1.1141850220264318, "grad_norm": 1.7666867258825858, "learning_rate": 8.947375980182937e-06, "loss": 0.49237731099128723, "step": 3162 }, { "epoch": 1.1145374449339207, "grad_norm": 1.8113939325794732, "learning_rate": 8.941582131956615e-06, "loss": 0.7349523305892944, "step": 3163 }, { "epoch": 1.1148898678414096, "grad_norm": 1.9764498599764084, "learning_rate": 8.935788643030218e-06, "loss": 0.5048422813415527, "step": 3164 }, { "epoch": 1.1152422907488986, "grad_norm": 1.90381850621639, "learning_rate": 8.92999551537046e-06, "loss": 0.6217244267463684, "step": 3165 }, { "epoch": 1.1155947136563877, "grad_norm": 1.6579628905821213, "learning_rate": 8.924202750943926e-06, "loss": 0.4949147701263428, "step": 3166 }, { "epoch": 1.1159471365638767, "grad_norm": 1.8665150826118222, "learning_rate": 8.918410351717074e-06, "loss": 0.5975630283355713, "step": 3167 }, { "epoch": 1.1162995594713656, "grad_norm": 1.8627553919144322, "learning_rate": 8.91261831965625e-06, "loss": 0.7546026110649109, "step": 3168 }, { "epoch": 1.1166519823788545, "grad_norm": 1.8785066059323416, "learning_rate": 8.906826656727665e-06, "loss": 0.6238037347793579, "step": 3169 }, { "epoch": 1.1170044052863437, "grad_norm": 1.7775910427875068, "learning_rate": 8.901035364897407e-06, "loss": 0.617587685585022, "step": 3170 }, { "epoch": 1.1173568281938326, "grad_norm": 1.6345696523196545, "learning_rate": 8.895244446131445e-06, "loss": 0.4834432005882263, "step": 3171 }, { "epoch": 1.1177092511013216, "grad_norm": 1.8061061322305951, "learning_rate": 8.889453902395608e-06, "loss": 0.614972710609436, "step": 3172 }, { "epoch": 1.1180616740088105, "grad_norm": 2.26536947887869, "learning_rate": 8.883663735655612e-06, "loss": 0.6468379497528076, "step": 3173 }, { "epoch": 1.1184140969162994, "grad_norm": 1.8154030785363677, "learning_rate": 8.877873947877042e-06, "loss": 0.6372466683387756, "step": 3174 }, { "epoch": 1.1187665198237886, "grad_norm": 1.8831907584481906, "learning_rate": 8.872084541025336e-06, "loss": 0.6295863389968872, "step": 3175 }, { "epoch": 1.1191189427312775, "grad_norm": 1.7211075291863254, "learning_rate": 8.866295517065831e-06, "loss": 0.6109524369239807, "step": 3176 }, { "epoch": 1.1194713656387665, "grad_norm": 1.6861537948886334, "learning_rate": 8.860506877963715e-06, "loss": 0.6724812388420105, "step": 3177 }, { "epoch": 1.1198237885462554, "grad_norm": 1.4091706259139964, "learning_rate": 8.854718625684049e-06, "loss": 0.6612162590026855, "step": 3178 }, { "epoch": 1.1201762114537446, "grad_norm": 1.6332443405139663, "learning_rate": 8.84893076219177e-06, "loss": 0.6209636926651001, "step": 3179 }, { "epoch": 1.1205286343612335, "grad_norm": 1.7567347030111673, "learning_rate": 8.843143289451673e-06, "loss": 0.8548281192779541, "step": 3180 }, { "epoch": 1.1208810572687224, "grad_norm": 1.742397796953756, "learning_rate": 8.837356209428428e-06, "loss": 0.4621508717536926, "step": 3181 }, { "epoch": 1.1212334801762114, "grad_norm": 1.8553184481302196, "learning_rate": 8.831569524086568e-06, "loss": 0.5065817832946777, "step": 3182 }, { "epoch": 1.1215859030837005, "grad_norm": 1.5532313157641433, "learning_rate": 8.825783235390488e-06, "loss": 0.5467691421508789, "step": 3183 }, { "epoch": 1.1219383259911895, "grad_norm": 1.3786030341795126, "learning_rate": 8.81999734530446e-06, "loss": 0.4938517212867737, "step": 3184 }, { "epoch": 1.1222907488986784, "grad_norm": 1.4972934746199023, "learning_rate": 8.814211855792609e-06, "loss": 0.6125702857971191, "step": 3185 }, { "epoch": 1.1226431718061674, "grad_norm": 1.427476145591487, "learning_rate": 8.80842676881893e-06, "loss": 0.5272841453552246, "step": 3186 }, { "epoch": 1.1229955947136563, "grad_norm": 1.8463623605620603, "learning_rate": 8.802642086347278e-06, "loss": 0.5595715045928955, "step": 3187 }, { "epoch": 1.1233480176211454, "grad_norm": 1.7533827268189746, "learning_rate": 8.796857810341375e-06, "loss": 0.7178677916526794, "step": 3188 }, { "epoch": 1.1237004405286344, "grad_norm": 2.166791630557212, "learning_rate": 8.791073942764806e-06, "loss": 0.6000991463661194, "step": 3189 }, { "epoch": 1.1240528634361233, "grad_norm": 1.7926160729471858, "learning_rate": 8.785290485581008e-06, "loss": 0.537361741065979, "step": 3190 }, { "epoch": 1.1244052863436123, "grad_norm": 1.7666842188914018, "learning_rate": 8.779507440753286e-06, "loss": 0.7135556936264038, "step": 3191 }, { "epoch": 1.1247577092511014, "grad_norm": 1.7053825384185084, "learning_rate": 8.773724810244805e-06, "loss": 0.501063346862793, "step": 3192 }, { "epoch": 1.1251101321585903, "grad_norm": 1.679109568038749, "learning_rate": 8.767942596018587e-06, "loss": 0.6885302662849426, "step": 3193 }, { "epoch": 1.1254625550660793, "grad_norm": 1.321748305255468, "learning_rate": 8.762160800037516e-06, "loss": 0.5902360081672668, "step": 3194 }, { "epoch": 1.1258149779735682, "grad_norm": 1.687654327550192, "learning_rate": 8.75637942426433e-06, "loss": 0.6308953762054443, "step": 3195 }, { "epoch": 1.1261674008810574, "grad_norm": 1.8380657710321036, "learning_rate": 8.750598470661625e-06, "loss": 0.5710124969482422, "step": 3196 }, { "epoch": 1.1265198237885463, "grad_norm": 1.76295044659038, "learning_rate": 8.744817941191862e-06, "loss": 0.6110632419586182, "step": 3197 }, { "epoch": 1.1268722466960353, "grad_norm": 1.7274451742305768, "learning_rate": 8.73903783781734e-06, "loss": 0.5274624824523926, "step": 3198 }, { "epoch": 1.1272246696035242, "grad_norm": 1.549070468504263, "learning_rate": 8.733258162500228e-06, "loss": 0.6144713163375854, "step": 3199 }, { "epoch": 1.1275770925110131, "grad_norm": 1.8001185698886477, "learning_rate": 8.727478917202551e-06, "loss": 0.6404621005058289, "step": 3200 }, { "epoch": 1.1279295154185023, "grad_norm": 1.602548541775438, "learning_rate": 8.721700103886177e-06, "loss": 0.5693025588989258, "step": 3201 }, { "epoch": 1.1282819383259912, "grad_norm": 1.6563446017851289, "learning_rate": 8.715921724512838e-06, "loss": 0.5631159543991089, "step": 3202 }, { "epoch": 1.1286343612334802, "grad_norm": 1.5785191171510689, "learning_rate": 8.710143781044113e-06, "loss": 0.648078441619873, "step": 3203 }, { "epoch": 1.128986784140969, "grad_norm": 2.0721270642934666, "learning_rate": 8.704366275441426e-06, "loss": 0.6858379244804382, "step": 3204 }, { "epoch": 1.1293392070484582, "grad_norm": 1.8203927475030908, "learning_rate": 8.698589209666074e-06, "loss": 0.7244000434875488, "step": 3205 }, { "epoch": 1.1296916299559472, "grad_norm": 1.7775130777760553, "learning_rate": 8.692812585679182e-06, "loss": 0.5918365716934204, "step": 3206 }, { "epoch": 1.1300440528634361, "grad_norm": 1.8950041670387165, "learning_rate": 8.687036405441733e-06, "loss": 0.6893443465232849, "step": 3207 }, { "epoch": 1.130396475770925, "grad_norm": 1.6934464725865028, "learning_rate": 8.681260670914564e-06, "loss": 0.729834794998169, "step": 3208 }, { "epoch": 1.130748898678414, "grad_norm": 1.9278305082183818, "learning_rate": 8.675485384058356e-06, "loss": 0.6525821685791016, "step": 3209 }, { "epoch": 1.1311013215859032, "grad_norm": 1.7892045210081244, "learning_rate": 8.669710546833642e-06, "loss": 0.6799874305725098, "step": 3210 }, { "epoch": 1.131453744493392, "grad_norm": 1.6216385781826248, "learning_rate": 8.6639361612008e-06, "loss": 0.5614932775497437, "step": 3211 }, { "epoch": 1.131806167400881, "grad_norm": 1.6912315117870094, "learning_rate": 8.658162229120045e-06, "loss": 0.5975101590156555, "step": 3212 }, { "epoch": 1.13215859030837, "grad_norm": 1.7352702737909875, "learning_rate": 8.652388752551458e-06, "loss": 0.5367887020111084, "step": 3213 }, { "epoch": 1.1325110132158591, "grad_norm": 1.360358935584503, "learning_rate": 8.646615733454949e-06, "loss": 0.4451865553855896, "step": 3214 }, { "epoch": 1.132863436123348, "grad_norm": 1.8983821913108012, "learning_rate": 8.64084317379028e-06, "loss": 0.6482576131820679, "step": 3215 }, { "epoch": 1.133215859030837, "grad_norm": 1.5858394578763535, "learning_rate": 8.635071075517053e-06, "loss": 0.5890318155288696, "step": 3216 }, { "epoch": 1.133568281938326, "grad_norm": 1.6567929917802857, "learning_rate": 8.629299440594719e-06, "loss": 0.554576575756073, "step": 3217 }, { "epoch": 1.1339207048458149, "grad_norm": 1.6966150183280715, "learning_rate": 8.623528270982567e-06, "loss": 0.5987116694450378, "step": 3218 }, { "epoch": 1.134273127753304, "grad_norm": 1.8696533969224407, "learning_rate": 8.617757568639731e-06, "loss": 0.49857625365257263, "step": 3219 }, { "epoch": 1.134625550660793, "grad_norm": 1.6960564098429034, "learning_rate": 8.61198733552518e-06, "loss": 0.6116641759872437, "step": 3220 }, { "epoch": 1.134977973568282, "grad_norm": 1.6619215502907394, "learning_rate": 8.606217573597738e-06, "loss": 0.4346674978733063, "step": 3221 }, { "epoch": 1.1353303964757708, "grad_norm": 1.6058889875943096, "learning_rate": 8.600448284816046e-06, "loss": 0.6973283290863037, "step": 3222 }, { "epoch": 1.13568281938326, "grad_norm": 1.547791232560021, "learning_rate": 8.594679471138613e-06, "loss": 0.5457896590232849, "step": 3223 }, { "epoch": 1.136035242290749, "grad_norm": 1.6457593373386994, "learning_rate": 8.58891113452376e-06, "loss": 0.4520479440689087, "step": 3224 }, { "epoch": 1.1363876651982379, "grad_norm": 1.6501706928794149, "learning_rate": 8.58314327692966e-06, "loss": 0.6169587969779968, "step": 3225 }, { "epoch": 1.1367400881057268, "grad_norm": 1.729795732302939, "learning_rate": 8.577375900314327e-06, "loss": 0.6398670673370361, "step": 3226 }, { "epoch": 1.1370925110132157, "grad_norm": 1.6846614829900397, "learning_rate": 8.571609006635604e-06, "loss": 0.5772207975387573, "step": 3227 }, { "epoch": 1.137444933920705, "grad_norm": 1.5622430074284195, "learning_rate": 8.565842597851165e-06, "loss": 0.5561503171920776, "step": 3228 }, { "epoch": 1.1377973568281938, "grad_norm": 1.644881271079104, "learning_rate": 8.560076675918537e-06, "loss": 0.4702373743057251, "step": 3229 }, { "epoch": 1.1381497797356828, "grad_norm": 1.778044829497574, "learning_rate": 8.554311242795061e-06, "loss": 0.5967564582824707, "step": 3230 }, { "epoch": 1.138502202643172, "grad_norm": 1.782270527802186, "learning_rate": 8.548546300437928e-06, "loss": 0.4749453663825989, "step": 3231 }, { "epoch": 1.1388546255506609, "grad_norm": 2.2009062727733046, "learning_rate": 8.542781850804155e-06, "loss": 0.6939869523048401, "step": 3232 }, { "epoch": 1.1392070484581498, "grad_norm": 1.4327701228186707, "learning_rate": 8.537017895850593e-06, "loss": 0.5618892908096313, "step": 3233 }, { "epoch": 1.1395594713656387, "grad_norm": 1.6784618730938181, "learning_rate": 8.531254437533925e-06, "loss": 0.6627654433250427, "step": 3234 }, { "epoch": 1.1399118942731277, "grad_norm": 1.770712809653697, "learning_rate": 8.525491477810671e-06, "loss": 0.6365151405334473, "step": 3235 }, { "epoch": 1.1402643171806168, "grad_norm": 1.6623213186798471, "learning_rate": 8.519729018637164e-06, "loss": 0.5207303762435913, "step": 3236 }, { "epoch": 1.1406167400881058, "grad_norm": 1.8240600257881658, "learning_rate": 8.513967061969594e-06, "loss": 0.7469059228897095, "step": 3237 }, { "epoch": 1.1409691629955947, "grad_norm": 1.7786802310337648, "learning_rate": 8.508205609763955e-06, "loss": 0.5778630971908569, "step": 3238 }, { "epoch": 1.1413215859030836, "grad_norm": 1.756406665695002, "learning_rate": 8.502444663976089e-06, "loss": 0.5447480082511902, "step": 3239 }, { "epoch": 1.1416740088105728, "grad_norm": 1.628690443424602, "learning_rate": 8.496684226561653e-06, "loss": 0.6002986431121826, "step": 3240 }, { "epoch": 1.1420264317180617, "grad_norm": 1.7257255594282812, "learning_rate": 8.490924299476133e-06, "loss": 0.7627072930335999, "step": 3241 }, { "epoch": 1.1423788546255507, "grad_norm": 1.725113553289998, "learning_rate": 8.485164884674854e-06, "loss": 0.6406078338623047, "step": 3242 }, { "epoch": 1.1427312775330396, "grad_norm": 2.110533369358698, "learning_rate": 8.479405984112949e-06, "loss": 0.47047436237335205, "step": 3243 }, { "epoch": 1.1430837004405285, "grad_norm": 2.0564519486525903, "learning_rate": 8.473647599745393e-06, "loss": 0.6702529191970825, "step": 3244 }, { "epoch": 1.1434361233480177, "grad_norm": 2.1168699536348488, "learning_rate": 8.467889733526977e-06, "loss": 0.6570258140563965, "step": 3245 }, { "epoch": 1.1437885462555066, "grad_norm": 11.021488641985083, "learning_rate": 8.462132387412312e-06, "loss": 0.6248423457145691, "step": 3246 }, { "epoch": 1.1441409691629956, "grad_norm": 1.6339128666105858, "learning_rate": 8.456375563355842e-06, "loss": 0.7377427816390991, "step": 3247 }, { "epoch": 1.1444933920704845, "grad_norm": 1.8159484011485405, "learning_rate": 8.45061926331183e-06, "loss": 0.6469020843505859, "step": 3248 }, { "epoch": 1.1448458149779737, "grad_norm": 1.81461416151687, "learning_rate": 8.444863489234356e-06, "loss": 0.6417430639266968, "step": 3249 }, { "epoch": 1.1451982378854626, "grad_norm": 1.7715952211280361, "learning_rate": 8.439108243077335e-06, "loss": 0.5447275638580322, "step": 3250 }, { "epoch": 1.1455506607929515, "grad_norm": 1.8341737914542349, "learning_rate": 8.433353526794484e-06, "loss": 0.6621315479278564, "step": 3251 }, { "epoch": 1.1459030837004405, "grad_norm": 1.850872292820976, "learning_rate": 8.42759934233936e-06, "loss": 0.5660392045974731, "step": 3252 }, { "epoch": 1.1462555066079294, "grad_norm": 1.695638018183687, "learning_rate": 8.42184569166532e-06, "loss": 0.43074172735214233, "step": 3253 }, { "epoch": 1.1466079295154186, "grad_norm": 1.6152519611154568, "learning_rate": 8.416092576725554e-06, "loss": 0.5863226056098938, "step": 3254 }, { "epoch": 1.1469603524229075, "grad_norm": 1.8724827582882198, "learning_rate": 8.410339999473067e-06, "loss": 0.6003422737121582, "step": 3255 }, { "epoch": 1.1473127753303964, "grad_norm": 1.806876842860533, "learning_rate": 8.404587961860678e-06, "loss": 0.6109241247177124, "step": 3256 }, { "epoch": 1.1476651982378854, "grad_norm": 1.7768687099142642, "learning_rate": 8.398836465841021e-06, "loss": 0.5749140977859497, "step": 3257 }, { "epoch": 1.1480176211453745, "grad_norm": 1.762377433704451, "learning_rate": 8.393085513366557e-06, "loss": 0.6920739412307739, "step": 3258 }, { "epoch": 1.1483700440528635, "grad_norm": 1.903311052790267, "learning_rate": 8.38733510638955e-06, "loss": 0.6632573008537292, "step": 3259 }, { "epoch": 1.1487224669603524, "grad_norm": 1.925929272799836, "learning_rate": 8.381585246862091e-06, "loss": 0.6396503448486328, "step": 3260 }, { "epoch": 1.1490748898678413, "grad_norm": 4.327872701462553, "learning_rate": 8.375835936736072e-06, "loss": 0.5975937843322754, "step": 3261 }, { "epoch": 1.1494273127753303, "grad_norm": 1.9097739370767552, "learning_rate": 8.370087177963204e-06, "loss": 0.6297920346260071, "step": 3262 }, { "epoch": 1.1497797356828194, "grad_norm": 1.6773858737351708, "learning_rate": 8.364338972495016e-06, "loss": 0.7004375457763672, "step": 3263 }, { "epoch": 1.1501321585903084, "grad_norm": 1.9905333664754346, "learning_rate": 8.358591322282845e-06, "loss": 0.5850871801376343, "step": 3264 }, { "epoch": 1.1504845814977973, "grad_norm": 1.6216139435027066, "learning_rate": 8.352844229277834e-06, "loss": 0.493900865316391, "step": 3265 }, { "epoch": 1.1508370044052865, "grad_norm": 1.8994324319983171, "learning_rate": 8.34709769543095e-06, "loss": 0.573354959487915, "step": 3266 }, { "epoch": 1.1511894273127754, "grad_norm": 2.1672972359364175, "learning_rate": 8.341351722692951e-06, "loss": 0.7154442667961121, "step": 3267 }, { "epoch": 1.1515418502202643, "grad_norm": 1.705511845117997, "learning_rate": 8.335606313014432e-06, "loss": 0.5429074764251709, "step": 3268 }, { "epoch": 1.1518942731277533, "grad_norm": 1.8606068751906144, "learning_rate": 8.329861468345768e-06, "loss": 0.6938891410827637, "step": 3269 }, { "epoch": 1.1522466960352422, "grad_norm": 5.765839224937511, "learning_rate": 8.324117190637157e-06, "loss": 0.7114205360412598, "step": 3270 }, { "epoch": 1.1525991189427314, "grad_norm": 1.761532917196708, "learning_rate": 8.318373481838605e-06, "loss": 0.5353071093559265, "step": 3271 }, { "epoch": 1.1529515418502203, "grad_norm": 1.931038515640054, "learning_rate": 8.312630343899921e-06, "loss": 0.7838516235351562, "step": 3272 }, { "epoch": 1.1533039647577092, "grad_norm": 2.013028743927059, "learning_rate": 8.306887778770724e-06, "loss": 0.630479633808136, "step": 3273 }, { "epoch": 1.1536563876651982, "grad_norm": 1.908388737326531, "learning_rate": 8.301145788400438e-06, "loss": 0.6568116545677185, "step": 3274 }, { "epoch": 1.1540088105726873, "grad_norm": 1.4673620532583986, "learning_rate": 8.295404374738278e-06, "loss": 0.5410804748535156, "step": 3275 }, { "epoch": 1.1543612334801763, "grad_norm": 2.0887831204496017, "learning_rate": 8.289663539733292e-06, "loss": 0.6699862480163574, "step": 3276 }, { "epoch": 1.1547136563876652, "grad_norm": 2.146352543425904, "learning_rate": 8.283923285334304e-06, "loss": 0.6828576326370239, "step": 3277 }, { "epoch": 1.1550660792951541, "grad_norm": 1.6441665475307043, "learning_rate": 8.278183613489951e-06, "loss": 0.5569214820861816, "step": 3278 }, { "epoch": 1.155418502202643, "grad_norm": 1.5736783771881073, "learning_rate": 8.27244452614868e-06, "loss": 0.6276477575302124, "step": 3279 }, { "epoch": 1.1557709251101322, "grad_norm": 1.639795393267647, "learning_rate": 8.266706025258727e-06, "loss": 0.5752792954444885, "step": 3280 }, { "epoch": 1.1561233480176212, "grad_norm": 1.8007170708068962, "learning_rate": 8.260968112768137e-06, "loss": 0.6149388551712036, "step": 3281 }, { "epoch": 1.1564757709251101, "grad_norm": 1.8241425629966381, "learning_rate": 8.255230790624755e-06, "loss": 0.6399196982383728, "step": 3282 }, { "epoch": 1.156828193832599, "grad_norm": 1.8065599712551461, "learning_rate": 8.249494060776215e-06, "loss": 0.6927458047866821, "step": 3283 }, { "epoch": 1.1571806167400882, "grad_norm": 1.5535864037785454, "learning_rate": 8.243757925169968e-06, "loss": 0.5843946933746338, "step": 3284 }, { "epoch": 1.1575330396475771, "grad_norm": 1.7771012211418213, "learning_rate": 8.238022385753248e-06, "loss": 0.6469332575798035, "step": 3285 }, { "epoch": 1.157885462555066, "grad_norm": 1.5500454202505596, "learning_rate": 8.23228744447309e-06, "loss": 0.572630763053894, "step": 3286 }, { "epoch": 1.158237885462555, "grad_norm": 1.7219264264044976, "learning_rate": 8.226553103276335e-06, "loss": 0.6872239112854004, "step": 3287 }, { "epoch": 1.158590308370044, "grad_norm": 1.7206454172461807, "learning_rate": 8.220819364109607e-06, "loss": 0.5116995573043823, "step": 3288 }, { "epoch": 1.1589427312775331, "grad_norm": 1.7411708693012447, "learning_rate": 8.215086228919336e-06, "loss": 0.6179347038269043, "step": 3289 }, { "epoch": 1.159295154185022, "grad_norm": 1.5098995111565061, "learning_rate": 8.209353699651745e-06, "loss": 0.573688805103302, "step": 3290 }, { "epoch": 1.159647577092511, "grad_norm": 1.6209084651188936, "learning_rate": 8.20362177825284e-06, "loss": 0.6622583866119385, "step": 3291 }, { "epoch": 1.16, "grad_norm": 1.9043199482736668, "learning_rate": 8.197890466668441e-06, "loss": 0.4945096969604492, "step": 3292 }, { "epoch": 1.160352422907489, "grad_norm": 1.9477760218669748, "learning_rate": 8.19215976684414e-06, "loss": 0.5657082796096802, "step": 3293 }, { "epoch": 1.160704845814978, "grad_norm": 1.7301213281073105, "learning_rate": 8.186429680725339e-06, "loss": 0.5684623122215271, "step": 3294 }, { "epoch": 1.161057268722467, "grad_norm": 1.7781389678625354, "learning_rate": 8.180700210257223e-06, "loss": 0.567638635635376, "step": 3295 }, { "epoch": 1.1614096916299559, "grad_norm": 1.8973989975016394, "learning_rate": 8.174971357384762e-06, "loss": 0.7182992696762085, "step": 3296 }, { "epoch": 1.1617621145374448, "grad_norm": 1.7202032555937063, "learning_rate": 8.169243124052731e-06, "loss": 0.7188737392425537, "step": 3297 }, { "epoch": 1.162114537444934, "grad_norm": 1.632750713102644, "learning_rate": 8.163515512205687e-06, "loss": 0.5532418489456177, "step": 3298 }, { "epoch": 1.162466960352423, "grad_norm": 2.2725291479645136, "learning_rate": 8.157788523787967e-06, "loss": 0.7167447209358215, "step": 3299 }, { "epoch": 1.1628193832599119, "grad_norm": 1.8053860419209504, "learning_rate": 8.152062160743716e-06, "loss": 0.633411169052124, "step": 3300 }, { "epoch": 1.1631718061674008, "grad_norm": 1.8006555184567121, "learning_rate": 8.146336425016849e-06, "loss": 0.6686321496963501, "step": 3301 }, { "epoch": 1.16352422907489, "grad_norm": 1.884331587638867, "learning_rate": 8.140611318551078e-06, "loss": 0.608701765537262, "step": 3302 }, { "epoch": 1.1638766519823789, "grad_norm": 1.6532674404979102, "learning_rate": 8.1348868432899e-06, "loss": 0.5607466101646423, "step": 3303 }, { "epoch": 1.1642290748898678, "grad_norm": 1.9224536271892947, "learning_rate": 8.12916300117659e-06, "loss": 0.6397457122802734, "step": 3304 }, { "epoch": 1.1645814977973568, "grad_norm": 1.9075190910370474, "learning_rate": 8.123439794154223e-06, "loss": 0.6681507229804993, "step": 3305 }, { "epoch": 1.1649339207048457, "grad_norm": 1.7601065273352539, "learning_rate": 8.117717224165645e-06, "loss": 0.5549972057342529, "step": 3306 }, { "epoch": 1.1652863436123349, "grad_norm": 1.9981914923817063, "learning_rate": 8.111995293153486e-06, "loss": 0.7519058585166931, "step": 3307 }, { "epoch": 1.1656387665198238, "grad_norm": 1.8817978978557874, "learning_rate": 8.106274003060172e-06, "loss": 0.7100121378898621, "step": 3308 }, { "epoch": 1.1659911894273127, "grad_norm": 2.081586750876693, "learning_rate": 8.100553355827897e-06, "loss": 0.6297321319580078, "step": 3309 }, { "epoch": 1.1663436123348019, "grad_norm": 2.2854313216105635, "learning_rate": 8.094833353398645e-06, "loss": 0.6875895857810974, "step": 3310 }, { "epoch": 1.1666960352422908, "grad_norm": 1.7297215389141958, "learning_rate": 8.08911399771418e-06, "loss": 0.5369099974632263, "step": 3311 }, { "epoch": 1.1670484581497798, "grad_norm": 1.7209622601094259, "learning_rate": 8.083395290716042e-06, "loss": 0.5598124265670776, "step": 3312 }, { "epoch": 1.1674008810572687, "grad_norm": 1.6153396072397332, "learning_rate": 8.077677234345557e-06, "loss": 0.6438342332839966, "step": 3313 }, { "epoch": 1.1677533039647576, "grad_norm": 1.649767256033485, "learning_rate": 8.07195983054383e-06, "loss": 0.5558618307113647, "step": 3314 }, { "epoch": 1.1681057268722468, "grad_norm": 1.744681713922102, "learning_rate": 8.06624308125173e-06, "loss": 0.5729602575302124, "step": 3315 }, { "epoch": 1.1684581497797357, "grad_norm": 2.294706401477936, "learning_rate": 8.060526988409929e-06, "loss": 0.5094903707504272, "step": 3316 }, { "epoch": 1.1688105726872247, "grad_norm": 1.6352779890455922, "learning_rate": 8.054811553958853e-06, "loss": 0.6605818867683411, "step": 3317 }, { "epoch": 1.1691629955947136, "grad_norm": 2.240048633930669, "learning_rate": 8.04909677983872e-06, "loss": 0.7929576635360718, "step": 3318 }, { "epoch": 1.1695154185022028, "grad_norm": 1.7445241989865017, "learning_rate": 8.043382667989514e-06, "loss": 0.5915192365646362, "step": 3319 }, { "epoch": 1.1698678414096917, "grad_norm": 1.6537456786938194, "learning_rate": 8.037669220351e-06, "loss": 0.5923853516578674, "step": 3320 }, { "epoch": 1.1702202643171806, "grad_norm": 1.7692219343864357, "learning_rate": 8.031956438862718e-06, "loss": 0.7034223079681396, "step": 3321 }, { "epoch": 1.1705726872246696, "grad_norm": 1.699093684077835, "learning_rate": 8.026244325463975e-06, "loss": 0.6093307733535767, "step": 3322 }, { "epoch": 1.1709251101321585, "grad_norm": 1.820021264359909, "learning_rate": 8.020532882093862e-06, "loss": 0.5709424614906311, "step": 3323 }, { "epoch": 1.1712775330396477, "grad_norm": 1.6327248259933085, "learning_rate": 8.01482211069123e-06, "loss": 0.5242069959640503, "step": 3324 }, { "epoch": 1.1716299559471366, "grad_norm": 1.8755413800206977, "learning_rate": 8.009112013194707e-06, "loss": 0.5869580507278442, "step": 3325 }, { "epoch": 1.1719823788546255, "grad_norm": 1.927667149386539, "learning_rate": 8.0034025915427e-06, "loss": 0.7281460762023926, "step": 3326 }, { "epoch": 1.1723348017621145, "grad_norm": 1.8020991914636244, "learning_rate": 7.997693847673378e-06, "loss": 0.6877723336219788, "step": 3327 }, { "epoch": 1.1726872246696036, "grad_norm": 1.4739994768631006, "learning_rate": 7.991985783524676e-06, "loss": 0.6045002937316895, "step": 3328 }, { "epoch": 1.1730396475770926, "grad_norm": 1.7637996531853402, "learning_rate": 7.986278401034315e-06, "loss": 0.5698690414428711, "step": 3329 }, { "epoch": 1.1733920704845815, "grad_norm": 1.879664532548966, "learning_rate": 7.980571702139759e-06, "loss": 0.6802438497543335, "step": 3330 }, { "epoch": 1.1737444933920704, "grad_norm": 1.9432824884843154, "learning_rate": 7.974865688778271e-06, "loss": 0.5840654373168945, "step": 3331 }, { "epoch": 1.1740969162995594, "grad_norm": 1.7557288678447098, "learning_rate": 7.969160362886855e-06, "loss": 0.5203073024749756, "step": 3332 }, { "epoch": 1.1744493392070485, "grad_norm": 1.5188701776399616, "learning_rate": 7.963455726402292e-06, "loss": 0.4558306932449341, "step": 3333 }, { "epoch": 1.1748017621145375, "grad_norm": 1.8464169088081481, "learning_rate": 7.957751781261132e-06, "loss": 0.6200483441352844, "step": 3334 }, { "epoch": 1.1751541850220264, "grad_norm": 1.4009839443781218, "learning_rate": 7.952048529399686e-06, "loss": 0.559386670589447, "step": 3335 }, { "epoch": 1.1755066079295153, "grad_norm": 1.5776847118393618, "learning_rate": 7.946345972754026e-06, "loss": 0.5521356463432312, "step": 3336 }, { "epoch": 1.1758590308370045, "grad_norm": 1.6725655120909741, "learning_rate": 7.940644113260001e-06, "loss": 0.6235495805740356, "step": 3337 }, { "epoch": 1.1762114537444934, "grad_norm": 1.6364629990686756, "learning_rate": 7.934942952853203e-06, "loss": 0.5196648836135864, "step": 3338 }, { "epoch": 1.1765638766519824, "grad_norm": 1.658819201732712, "learning_rate": 7.929242493469013e-06, "loss": 0.5959422588348389, "step": 3339 }, { "epoch": 1.1769162995594713, "grad_norm": 1.8867606277211662, "learning_rate": 7.923542737042549e-06, "loss": 0.5400167107582092, "step": 3340 }, { "epoch": 1.1772687224669602, "grad_norm": 1.8686352871929341, "learning_rate": 7.917843685508702e-06, "loss": 0.688996434211731, "step": 3341 }, { "epoch": 1.1776211453744494, "grad_norm": 1.844624213320976, "learning_rate": 7.912145340802127e-06, "loss": 0.623216450214386, "step": 3342 }, { "epoch": 1.1779735682819383, "grad_norm": 1.7951119497780943, "learning_rate": 7.906447704857233e-06, "loss": 0.587382435798645, "step": 3343 }, { "epoch": 1.1783259911894273, "grad_norm": 1.4508698182802122, "learning_rate": 7.900750779608187e-06, "loss": 0.6033053398132324, "step": 3344 }, { "epoch": 1.1786784140969162, "grad_norm": 1.5026274052311877, "learning_rate": 7.895054566988924e-06, "loss": 0.557671308517456, "step": 3345 }, { "epoch": 1.1790308370044054, "grad_norm": 1.6193785911353318, "learning_rate": 7.889359068933122e-06, "loss": 0.4550681710243225, "step": 3346 }, { "epoch": 1.1793832599118943, "grad_norm": 1.7532225132073032, "learning_rate": 7.883664287374235e-06, "loss": 0.6417531967163086, "step": 3347 }, { "epoch": 1.1797356828193832, "grad_norm": 2.046641045277204, "learning_rate": 7.877970224245458e-06, "loss": 0.703549861907959, "step": 3348 }, { "epoch": 1.1800881057268722, "grad_norm": 1.9966595548369739, "learning_rate": 7.87227688147975e-06, "loss": 0.7438976764678955, "step": 3349 }, { "epoch": 1.1804405286343613, "grad_norm": 1.9757665254478705, "learning_rate": 7.866584261009823e-06, "loss": 0.5563932657241821, "step": 3350 }, { "epoch": 1.1807929515418503, "grad_norm": 1.9705828017858218, "learning_rate": 7.860892364768145e-06, "loss": 0.6332740783691406, "step": 3351 }, { "epoch": 1.1811453744493392, "grad_norm": 1.6800252042998722, "learning_rate": 7.855201194686938e-06, "loss": 0.5207923650741577, "step": 3352 }, { "epoch": 1.1814977973568281, "grad_norm": 1.704285155728578, "learning_rate": 7.849510752698179e-06, "loss": 0.5930209755897522, "step": 3353 }, { "epoch": 1.1818502202643173, "grad_norm": 1.9626347095192314, "learning_rate": 7.843821040733588e-06, "loss": 0.6207472085952759, "step": 3354 }, { "epoch": 1.1822026431718062, "grad_norm": 1.631891920380694, "learning_rate": 7.838132060724657e-06, "loss": 0.5487867593765259, "step": 3355 }, { "epoch": 1.1825550660792952, "grad_norm": 1.719446635213068, "learning_rate": 7.83244381460261e-06, "loss": 0.5457941889762878, "step": 3356 }, { "epoch": 1.182907488986784, "grad_norm": 4.79087339281713, "learning_rate": 7.826756304298428e-06, "loss": 0.5203769207000732, "step": 3357 }, { "epoch": 1.183259911894273, "grad_norm": 2.2130523974851006, "learning_rate": 7.821069531742848e-06, "loss": 0.7241770029067993, "step": 3358 }, { "epoch": 1.1836123348017622, "grad_norm": 1.872241533824603, "learning_rate": 7.815383498866351e-06, "loss": 0.5085904598236084, "step": 3359 }, { "epoch": 1.1839647577092511, "grad_norm": 1.7457024495825946, "learning_rate": 7.80969820759917e-06, "loss": 0.6219276785850525, "step": 3360 }, { "epoch": 1.18431718061674, "grad_norm": 1.657619548935653, "learning_rate": 7.804013659871286e-06, "loss": 0.5621576309204102, "step": 3361 }, { "epoch": 1.184669603524229, "grad_norm": 2.006942738555184, "learning_rate": 7.798329857612415e-06, "loss": 0.6862529516220093, "step": 3362 }, { "epoch": 1.1850220264317182, "grad_norm": 1.6254700608957282, "learning_rate": 7.792646802752045e-06, "loss": 0.5536706447601318, "step": 3363 }, { "epoch": 1.185374449339207, "grad_norm": 1.8365676060407183, "learning_rate": 7.786964497219389e-06, "loss": 0.7158493995666504, "step": 3364 }, { "epoch": 1.185726872246696, "grad_norm": 1.5882377854785632, "learning_rate": 7.781282942943411e-06, "loss": 0.6510338187217712, "step": 3365 }, { "epoch": 1.186079295154185, "grad_norm": 1.6887309758558333, "learning_rate": 7.775602141852827e-06, "loss": 0.4999651312828064, "step": 3366 }, { "epoch": 1.186431718061674, "grad_norm": 1.7482854003458987, "learning_rate": 7.769922095876088e-06, "loss": 0.566371738910675, "step": 3367 }, { "epoch": 1.186784140969163, "grad_norm": 1.8523910267151578, "learning_rate": 7.764242806941396e-06, "loss": 0.6424880623817444, "step": 3368 }, { "epoch": 1.187136563876652, "grad_norm": 1.7770666290685069, "learning_rate": 7.758564276976696e-06, "loss": 0.6731792688369751, "step": 3369 }, { "epoch": 1.187488986784141, "grad_norm": 1.8284341736993877, "learning_rate": 7.752886507909661e-06, "loss": 0.7350698113441467, "step": 3370 }, { "epoch": 1.1878414096916299, "grad_norm": 1.6211597569244138, "learning_rate": 7.747209501667729e-06, "loss": 0.49212586879730225, "step": 3371 }, { "epoch": 1.188193832599119, "grad_norm": 1.8399284999038652, "learning_rate": 7.741533260178058e-06, "loss": 0.46775591373443604, "step": 3372 }, { "epoch": 1.188546255506608, "grad_norm": 1.9173381710912725, "learning_rate": 7.73585778536756e-06, "loss": 0.7006367444992065, "step": 3373 }, { "epoch": 1.188898678414097, "grad_norm": 1.9011259462553447, "learning_rate": 7.730183079162882e-06, "loss": 0.6403789520263672, "step": 3374 }, { "epoch": 1.1892511013215858, "grad_norm": 1.7192698764020407, "learning_rate": 7.724509143490409e-06, "loss": 0.5788881778717041, "step": 3375 }, { "epoch": 1.1896035242290748, "grad_norm": 1.8160886708158774, "learning_rate": 7.718835980276265e-06, "loss": 0.5216118693351746, "step": 3376 }, { "epoch": 1.189955947136564, "grad_norm": 1.8022868379388808, "learning_rate": 7.713163591446318e-06, "loss": 0.5951248407363892, "step": 3377 }, { "epoch": 1.1903083700440529, "grad_norm": 1.7460515067285554, "learning_rate": 7.707491978926157e-06, "loss": 0.4975050687789917, "step": 3378 }, { "epoch": 1.1906607929515418, "grad_norm": 1.770763460120106, "learning_rate": 7.701821144641127e-06, "loss": 0.6019243001937866, "step": 3379 }, { "epoch": 1.1910132158590307, "grad_norm": 1.7832166509700509, "learning_rate": 7.696151090516292e-06, "loss": 0.6395450830459595, "step": 3380 }, { "epoch": 1.19136563876652, "grad_norm": 1.6347986183513594, "learning_rate": 7.690481818476468e-06, "loss": 0.579787015914917, "step": 3381 }, { "epoch": 1.1917180616740088, "grad_norm": 1.4744637046036069, "learning_rate": 7.684813330446191e-06, "loss": 0.5136005878448486, "step": 3382 }, { "epoch": 1.1920704845814978, "grad_norm": 1.7266158280823927, "learning_rate": 7.679145628349734e-06, "loss": 0.6639782190322876, "step": 3383 }, { "epoch": 1.1924229074889867, "grad_norm": 1.8900727159770023, "learning_rate": 7.673478714111111e-06, "loss": 0.5575984716415405, "step": 3384 }, { "epoch": 1.1927753303964757, "grad_norm": 2.0885094289190658, "learning_rate": 7.667812589654062e-06, "loss": 0.6456045508384705, "step": 3385 }, { "epoch": 1.1931277533039648, "grad_norm": 1.9286041654650978, "learning_rate": 7.662147256902055e-06, "loss": 0.6936196088790894, "step": 3386 }, { "epoch": 1.1934801762114537, "grad_norm": 1.758654368664718, "learning_rate": 7.656482717778299e-06, "loss": 0.5490384697914124, "step": 3387 }, { "epoch": 1.1938325991189427, "grad_norm": 1.9621511017976598, "learning_rate": 7.650818974205727e-06, "loss": 0.6973621845245361, "step": 3388 }, { "epoch": 1.1941850220264318, "grad_norm": 1.835769632858156, "learning_rate": 7.645156028107005e-06, "loss": 0.7471047639846802, "step": 3389 }, { "epoch": 1.1945374449339208, "grad_norm": 1.7902415027725214, "learning_rate": 7.639493881404526e-06, "loss": 0.6205108165740967, "step": 3390 }, { "epoch": 1.1948898678414097, "grad_norm": 1.6920866725907067, "learning_rate": 7.63383253602041e-06, "loss": 0.747038722038269, "step": 3391 }, { "epoch": 1.1952422907488987, "grad_norm": 1.5771320255200836, "learning_rate": 7.628171993876514e-06, "loss": 0.5185794830322266, "step": 3392 }, { "epoch": 1.1955947136563876, "grad_norm": 1.6878325344643712, "learning_rate": 7.6225122568944124e-06, "loss": 0.6059385538101196, "step": 3393 }, { "epoch": 1.1959471365638767, "grad_norm": 1.6275144870635614, "learning_rate": 7.6168533269954045e-06, "loss": 0.5154507160186768, "step": 3394 }, { "epoch": 1.1962995594713657, "grad_norm": 1.8584269669132367, "learning_rate": 7.611195206100529e-06, "loss": 0.684306263923645, "step": 3395 }, { "epoch": 1.1966519823788546, "grad_norm": 1.60676147024925, "learning_rate": 7.605537896130537e-06, "loss": 0.5637205839157104, "step": 3396 }, { "epoch": 1.1970044052863436, "grad_norm": 2.099988274984523, "learning_rate": 7.599881399005913e-06, "loss": 0.700809121131897, "step": 3397 }, { "epoch": 1.1973568281938327, "grad_norm": 1.8285381374549698, "learning_rate": 7.594225716646859e-06, "loss": 0.45139041543006897, "step": 3398 }, { "epoch": 1.1977092511013216, "grad_norm": 1.9616153744225684, "learning_rate": 7.588570850973301e-06, "loss": 0.6623016595840454, "step": 3399 }, { "epoch": 1.1980616740088106, "grad_norm": 1.5510325285611402, "learning_rate": 7.582916803904899e-06, "loss": 0.47430598735809326, "step": 3400 }, { "epoch": 1.1984140969162995, "grad_norm": 1.7180906175268718, "learning_rate": 7.57726357736101e-06, "loss": 0.7190637588500977, "step": 3401 }, { "epoch": 1.1987665198237885, "grad_norm": 1.4703339836450204, "learning_rate": 7.571611173260747e-06, "loss": 0.552079439163208, "step": 3402 }, { "epoch": 1.1991189427312776, "grad_norm": 1.665813020849203, "learning_rate": 7.565959593522914e-06, "loss": 0.5499744415283203, "step": 3403 }, { "epoch": 1.1994713656387666, "grad_norm": 1.6507149154277247, "learning_rate": 7.560308840066046e-06, "loss": 0.6013774871826172, "step": 3404 }, { "epoch": 1.1998237885462555, "grad_norm": 1.5847999964914972, "learning_rate": 7.554658914808404e-06, "loss": 0.5489538908004761, "step": 3405 }, { "epoch": 1.2001762114537444, "grad_norm": 1.72263968265959, "learning_rate": 7.549009819667956e-06, "loss": 0.6124382615089417, "step": 3406 }, { "epoch": 1.2005286343612336, "grad_norm": 2.1073738195754594, "learning_rate": 7.543361556562397e-06, "loss": 0.6895862817764282, "step": 3407 }, { "epoch": 1.2008810572687225, "grad_norm": 2.063900978481081, "learning_rate": 7.537714127409139e-06, "loss": 0.6632197499275208, "step": 3408 }, { "epoch": 1.2012334801762115, "grad_norm": 1.6352648722318401, "learning_rate": 7.5320675341253e-06, "loss": 0.5940145254135132, "step": 3409 }, { "epoch": 1.2015859030837004, "grad_norm": 1.884013328310988, "learning_rate": 7.526421778627735e-06, "loss": 0.646323561668396, "step": 3410 }, { "epoch": 1.2019383259911893, "grad_norm": 1.7070941231545174, "learning_rate": 7.520776862832993e-06, "loss": 0.6173659563064575, "step": 3411 }, { "epoch": 1.2022907488986785, "grad_norm": 1.8582208465763577, "learning_rate": 7.515132788657347e-06, "loss": 0.574191689491272, "step": 3412 }, { "epoch": 1.2026431718061674, "grad_norm": 1.9220370982111243, "learning_rate": 7.50948955801679e-06, "loss": 0.6243089437484741, "step": 3413 }, { "epoch": 1.2029955947136564, "grad_norm": 1.7949632694678572, "learning_rate": 7.503847172827022e-06, "loss": 0.692270040512085, "step": 3414 }, { "epoch": 1.2033480176211453, "grad_norm": 1.6803082040464332, "learning_rate": 7.498205635003451e-06, "loss": 0.5929970145225525, "step": 3415 }, { "epoch": 1.2037004405286345, "grad_norm": 1.6077232593078599, "learning_rate": 7.4925649464612126e-06, "loss": 0.5479272603988647, "step": 3416 }, { "epoch": 1.2040528634361234, "grad_norm": 1.5415384890909907, "learning_rate": 7.486925109115135e-06, "loss": 0.5923635363578796, "step": 3417 }, { "epoch": 1.2044052863436123, "grad_norm": 1.7506756122488851, "learning_rate": 7.48128612487978e-06, "loss": 0.6530192494392395, "step": 3418 }, { "epoch": 1.2047577092511013, "grad_norm": 1.533550542452438, "learning_rate": 7.475647995669397e-06, "loss": 0.5104716420173645, "step": 3419 }, { "epoch": 1.2051101321585902, "grad_norm": 1.8415327152950194, "learning_rate": 7.470010723397958e-06, "loss": 0.6526790261268616, "step": 3420 }, { "epoch": 1.2054625550660794, "grad_norm": 1.746747219195987, "learning_rate": 7.464374309979143e-06, "loss": 0.5985254645347595, "step": 3421 }, { "epoch": 1.2058149779735683, "grad_norm": 1.9679342498420438, "learning_rate": 7.458738757326336e-06, "loss": 0.6575271487236023, "step": 3422 }, { "epoch": 1.2061674008810572, "grad_norm": 1.7353179250025277, "learning_rate": 7.453104067352637e-06, "loss": 0.5906708836555481, "step": 3423 }, { "epoch": 1.2065198237885462, "grad_norm": 1.7518769855954601, "learning_rate": 7.4474702419708465e-06, "loss": 0.7992517352104187, "step": 3424 }, { "epoch": 1.2068722466960353, "grad_norm": 1.7067520122752557, "learning_rate": 7.4418372830934645e-06, "loss": 0.5935543179512024, "step": 3425 }, { "epoch": 1.2072246696035243, "grad_norm": 1.877304862966978, "learning_rate": 7.436205192632719e-06, "loss": 0.7166613340377808, "step": 3426 }, { "epoch": 1.2075770925110132, "grad_norm": 1.7575954983917004, "learning_rate": 7.430573972500519e-06, "loss": 0.5254578590393066, "step": 3427 }, { "epoch": 1.2079295154185021, "grad_norm": 1.7449214411247376, "learning_rate": 7.42494362460849e-06, "loss": 0.6586379408836365, "step": 3428 }, { "epoch": 1.208281938325991, "grad_norm": 1.7864206478373184, "learning_rate": 7.419314150867964e-06, "loss": 0.6960606575012207, "step": 3429 }, { "epoch": 1.2086343612334802, "grad_norm": 1.7557785377406303, "learning_rate": 7.413685553189969e-06, "loss": 0.6107728481292725, "step": 3430 }, { "epoch": 1.2089867841409692, "grad_norm": 1.624755754090177, "learning_rate": 7.408057833485241e-06, "loss": 0.6446499824523926, "step": 3431 }, { "epoch": 1.209339207048458, "grad_norm": 1.9153166988080477, "learning_rate": 7.402430993664216e-06, "loss": 0.7070472240447998, "step": 3432 }, { "epoch": 1.2096916299559473, "grad_norm": 2.004011228140917, "learning_rate": 7.396805035637023e-06, "loss": 0.5919365882873535, "step": 3433 }, { "epoch": 1.2100440528634362, "grad_norm": 1.7861550041093852, "learning_rate": 7.391179961313512e-06, "loss": 0.5975243449211121, "step": 3434 }, { "epoch": 1.2103964757709251, "grad_norm": 1.6863010997131964, "learning_rate": 7.385555772603212e-06, "loss": 0.5772840976715088, "step": 3435 }, { "epoch": 1.210748898678414, "grad_norm": 1.8451401620227157, "learning_rate": 7.379932471415362e-06, "loss": 0.7335072755813599, "step": 3436 }, { "epoch": 1.211101321585903, "grad_norm": 2.0255796426124877, "learning_rate": 7.3743100596589e-06, "loss": 0.6214553713798523, "step": 3437 }, { "epoch": 1.2114537444933922, "grad_norm": 1.8204785128516552, "learning_rate": 7.368688539242457e-06, "loss": 0.6515316963195801, "step": 3438 }, { "epoch": 1.211806167400881, "grad_norm": 1.778475729690813, "learning_rate": 7.3630679120743665e-06, "loss": 0.6479551196098328, "step": 3439 }, { "epoch": 1.21215859030837, "grad_norm": 1.8992442060407408, "learning_rate": 7.357448180062657e-06, "loss": 0.6195069551467896, "step": 3440 }, { "epoch": 1.212511013215859, "grad_norm": 1.8044588174946172, "learning_rate": 7.351829345115047e-06, "loss": 0.5939193964004517, "step": 3441 }, { "epoch": 1.2128634361233481, "grad_norm": 1.7404213735338998, "learning_rate": 7.346211409138964e-06, "loss": 0.6346434354782104, "step": 3442 }, { "epoch": 1.213215859030837, "grad_norm": 1.7854241859310716, "learning_rate": 7.340594374041516e-06, "loss": 0.5924171209335327, "step": 3443 }, { "epoch": 1.213568281938326, "grad_norm": 1.4550427635518266, "learning_rate": 7.334978241729514e-06, "loss": 0.48560285568237305, "step": 3444 }, { "epoch": 1.213920704845815, "grad_norm": 2.0456790867838865, "learning_rate": 7.329363014109463e-06, "loss": 0.643998384475708, "step": 3445 }, { "epoch": 1.2142731277533039, "grad_norm": 1.9340204732587762, "learning_rate": 7.323748693087551e-06, "loss": 0.6041159629821777, "step": 3446 }, { "epoch": 1.214625550660793, "grad_norm": 1.991943883280592, "learning_rate": 7.318135280569674e-06, "loss": 0.7143498659133911, "step": 3447 }, { "epoch": 1.214977973568282, "grad_norm": 1.910490525820005, "learning_rate": 7.312522778461409e-06, "loss": 0.5821564197540283, "step": 3448 }, { "epoch": 1.215330396475771, "grad_norm": 1.9609409525419488, "learning_rate": 7.3069111886680166e-06, "loss": 0.5786745548248291, "step": 3449 }, { "epoch": 1.2156828193832598, "grad_norm": 1.7004659993753848, "learning_rate": 7.3013005130944666e-06, "loss": 0.6740534901618958, "step": 3450 }, { "epoch": 1.216035242290749, "grad_norm": 1.9264837774532027, "learning_rate": 7.2956907536454045e-06, "loss": 0.6353983879089355, "step": 3451 }, { "epoch": 1.216387665198238, "grad_norm": 1.6467978200520468, "learning_rate": 7.290081912225172e-06, "loss": 0.6890027523040771, "step": 3452 }, { "epoch": 1.2167400881057269, "grad_norm": 2.194089687314607, "learning_rate": 7.284473990737795e-06, "loss": 0.6485118269920349, "step": 3453 }, { "epoch": 1.2170925110132158, "grad_norm": 1.8020323615419078, "learning_rate": 7.2788669910869845e-06, "loss": 0.5364162921905518, "step": 3454 }, { "epoch": 1.2174449339207047, "grad_norm": 1.8770204171846867, "learning_rate": 7.27326091517615e-06, "loss": 0.6625754833221436, "step": 3455 }, { "epoch": 1.217797356828194, "grad_norm": 1.9138778572255513, "learning_rate": 7.267655764908374e-06, "loss": 0.7090050578117371, "step": 3456 }, { "epoch": 1.2181497797356828, "grad_norm": 1.7151154871040917, "learning_rate": 7.26205154218643e-06, "loss": 0.6556301116943359, "step": 3457 }, { "epoch": 1.2185022026431718, "grad_norm": 2.12213118759585, "learning_rate": 7.2564482489127815e-06, "loss": 0.7998625636100769, "step": 3458 }, { "epoch": 1.2188546255506607, "grad_norm": 1.8721449700246833, "learning_rate": 7.250845886989568e-06, "loss": 0.6336952447891235, "step": 3459 }, { "epoch": 1.2192070484581499, "grad_norm": 1.7786932342182031, "learning_rate": 7.245244458318621e-06, "loss": 0.5072300434112549, "step": 3460 }, { "epoch": 1.2195594713656388, "grad_norm": 1.9350920817100896, "learning_rate": 7.23964396480145e-06, "loss": 0.6297830939292908, "step": 3461 }, { "epoch": 1.2199118942731277, "grad_norm": 1.7384183002767206, "learning_rate": 7.234044408339243e-06, "loss": 0.5560386180877686, "step": 3462 }, { "epoch": 1.2202643171806167, "grad_norm": 1.7834281461054429, "learning_rate": 7.228445790832885e-06, "loss": 0.5180274844169617, "step": 3463 }, { "epoch": 1.2206167400881056, "grad_norm": 1.5903839847735544, "learning_rate": 7.222848114182926e-06, "loss": 0.4870688319206238, "step": 3464 }, { "epoch": 1.2209691629955948, "grad_norm": 1.5913924611315027, "learning_rate": 7.217251380289602e-06, "loss": 0.46914681792259216, "step": 3465 }, { "epoch": 1.2213215859030837, "grad_norm": 1.6510218664086935, "learning_rate": 7.211655591052833e-06, "loss": 0.5980997085571289, "step": 3466 }, { "epoch": 1.2216740088105726, "grad_norm": 2.0761228855668468, "learning_rate": 7.206060748372212e-06, "loss": 0.5982732772827148, "step": 3467 }, { "epoch": 1.2220264317180616, "grad_norm": 1.5384750193393883, "learning_rate": 7.200466854147019e-06, "loss": 0.612629771232605, "step": 3468 }, { "epoch": 1.2223788546255507, "grad_norm": 1.6776022561511, "learning_rate": 7.194873910276205e-06, "loss": 0.606558084487915, "step": 3469 }, { "epoch": 1.2227312775330397, "grad_norm": 2.093853594654106, "learning_rate": 7.189281918658396e-06, "loss": 0.7133803367614746, "step": 3470 }, { "epoch": 1.2230837004405286, "grad_norm": 1.737492396211302, "learning_rate": 7.183690881191908e-06, "loss": 0.5640908479690552, "step": 3471 }, { "epoch": 1.2234361233480175, "grad_norm": 1.9131350962270206, "learning_rate": 7.178100799774717e-06, "loss": 0.6376210451126099, "step": 3472 }, { "epoch": 1.2237885462555067, "grad_norm": 1.7418892302924867, "learning_rate": 7.172511676304481e-06, "loss": 0.6207184791564941, "step": 3473 }, { "epoch": 1.2241409691629956, "grad_norm": 2.0136397077316133, "learning_rate": 7.166923512678538e-06, "loss": 0.47848421335220337, "step": 3474 }, { "epoch": 1.2244933920704846, "grad_norm": 1.89946756738985, "learning_rate": 7.161336310793894e-06, "loss": 0.6052829027175903, "step": 3475 }, { "epoch": 1.2248458149779735, "grad_norm": 1.968672987503914, "learning_rate": 7.155750072547229e-06, "loss": 0.6050940155982971, "step": 3476 }, { "epoch": 1.2251982378854627, "grad_norm": 2.566995671782078, "learning_rate": 7.150164799834902e-06, "loss": 0.6121659278869629, "step": 3477 }, { "epoch": 1.2255506607929516, "grad_norm": 1.9679344001124786, "learning_rate": 7.144580494552929e-06, "loss": 0.6886739730834961, "step": 3478 }, { "epoch": 1.2259030837004405, "grad_norm": 1.5760234299307694, "learning_rate": 7.13899715859702e-06, "loss": 0.5001103281974792, "step": 3479 }, { "epoch": 1.2262555066079295, "grad_norm": 2.1260048612910216, "learning_rate": 7.133414793862532e-06, "loss": 0.5948734283447266, "step": 3480 }, { "epoch": 1.2266079295154184, "grad_norm": 2.593831579740968, "learning_rate": 7.127833402244515e-06, "loss": 0.6179298162460327, "step": 3481 }, { "epoch": 1.2269603524229076, "grad_norm": 1.6926296837265904, "learning_rate": 7.122252985637672e-06, "loss": 0.5543676614761353, "step": 3482 }, { "epoch": 1.2273127753303965, "grad_norm": 1.6008632106545562, "learning_rate": 7.116673545936379e-06, "loss": 0.6279658079147339, "step": 3483 }, { "epoch": 1.2276651982378854, "grad_norm": 1.5383086530060461, "learning_rate": 7.111095085034687e-06, "loss": 0.6692230701446533, "step": 3484 }, { "epoch": 1.2280176211453744, "grad_norm": 1.7218507243355061, "learning_rate": 7.1055176048263085e-06, "loss": 0.6124502420425415, "step": 3485 }, { "epoch": 1.2283700440528635, "grad_norm": 2.0325469007846007, "learning_rate": 7.09994110720462e-06, "loss": 0.6241810321807861, "step": 3486 }, { "epoch": 1.2287224669603525, "grad_norm": 1.7620353767255947, "learning_rate": 7.094365594062675e-06, "loss": 0.6556589603424072, "step": 3487 }, { "epoch": 1.2290748898678414, "grad_norm": 1.660185756567605, "learning_rate": 7.0887910672931815e-06, "loss": 0.480433851480484, "step": 3488 }, { "epoch": 1.2294273127753303, "grad_norm": 1.7666817554476708, "learning_rate": 7.083217528788524e-06, "loss": 0.6198803782463074, "step": 3489 }, { "epoch": 1.2297797356828193, "grad_norm": 1.7945939958355666, "learning_rate": 7.077644980440741e-06, "loss": 0.6368751525878906, "step": 3490 }, { "epoch": 1.2301321585903084, "grad_norm": 1.904999974616483, "learning_rate": 7.072073424141538e-06, "loss": 0.5992522239685059, "step": 3491 }, { "epoch": 1.2304845814977974, "grad_norm": 1.6441410368294835, "learning_rate": 7.066502861782289e-06, "loss": 0.5917885303497314, "step": 3492 }, { "epoch": 1.2308370044052863, "grad_norm": 1.9090985571817867, "learning_rate": 7.060933295254027e-06, "loss": 0.615925669670105, "step": 3493 }, { "epoch": 1.2311894273127753, "grad_norm": 1.5510149338562214, "learning_rate": 7.055364726447437e-06, "loss": 0.4408820867538452, "step": 3494 }, { "epoch": 1.2315418502202644, "grad_norm": 1.706805010144051, "learning_rate": 7.049797157252889e-06, "loss": 0.4918386936187744, "step": 3495 }, { "epoch": 1.2318942731277533, "grad_norm": 2.0047166519470965, "learning_rate": 7.0442305895603844e-06, "loss": 0.6964970827102661, "step": 3496 }, { "epoch": 1.2322466960352423, "grad_norm": 1.993882373770559, "learning_rate": 7.038665025259615e-06, "loss": 0.5269606113433838, "step": 3497 }, { "epoch": 1.2325991189427312, "grad_norm": 1.7338430673292662, "learning_rate": 7.033100466239908e-06, "loss": 0.6146842241287231, "step": 3498 }, { "epoch": 1.2329515418502202, "grad_norm": 1.8958783101408965, "learning_rate": 7.027536914390257e-06, "loss": 0.7163739800453186, "step": 3499 }, { "epoch": 1.2333039647577093, "grad_norm": 1.5575657818438158, "learning_rate": 7.021974371599318e-06, "loss": 0.5851477980613708, "step": 3500 }, { "epoch": 1.2336563876651983, "grad_norm": 1.3831914970718109, "learning_rate": 7.0164128397554e-06, "loss": 0.585768461227417, "step": 3501 }, { "epoch": 1.2340088105726872, "grad_norm": 1.651121323438745, "learning_rate": 7.0108523207464706e-06, "loss": 0.5467718839645386, "step": 3502 }, { "epoch": 1.2343612334801761, "grad_norm": 1.8179588757324485, "learning_rate": 7.0052928164601564e-06, "loss": 0.638299822807312, "step": 3503 }, { "epoch": 1.2347136563876653, "grad_norm": 1.8158584952636452, "learning_rate": 6.9997343287837275e-06, "loss": 0.6737650036811829, "step": 3504 }, { "epoch": 1.2350660792951542, "grad_norm": 1.7619528960945736, "learning_rate": 6.9941768596041224e-06, "loss": 0.6659837961196899, "step": 3505 }, { "epoch": 1.2354185022026432, "grad_norm": 1.9059656133131788, "learning_rate": 6.988620410807932e-06, "loss": 0.6731020212173462, "step": 3506 }, { "epoch": 1.235770925110132, "grad_norm": 1.8111638058637756, "learning_rate": 6.983064984281389e-06, "loss": 0.6236598491668701, "step": 3507 }, { "epoch": 1.236123348017621, "grad_norm": 1.8485171900570894, "learning_rate": 6.9775105819103985e-06, "loss": 0.6233193874359131, "step": 3508 }, { "epoch": 1.2364757709251102, "grad_norm": 1.7456936175280036, "learning_rate": 6.971957205580497e-06, "loss": 0.5914918184280396, "step": 3509 }, { "epoch": 1.2368281938325991, "grad_norm": 2.069060854376664, "learning_rate": 6.966404857176893e-06, "loss": 0.6576484441757202, "step": 3510 }, { "epoch": 1.237180616740088, "grad_norm": 1.6371442891988068, "learning_rate": 6.960853538584431e-06, "loss": 0.5609208941459656, "step": 3511 }, { "epoch": 1.2375330396475772, "grad_norm": 1.8336206343046235, "learning_rate": 6.955303251687609e-06, "loss": 0.6405455470085144, "step": 3512 }, { "epoch": 1.2378854625550662, "grad_norm": 1.6981959386126726, "learning_rate": 6.949753998370579e-06, "loss": 0.5621844530105591, "step": 3513 }, { "epoch": 1.238237885462555, "grad_norm": 1.6040361718583698, "learning_rate": 6.944205780517138e-06, "loss": 0.5674207210540771, "step": 3514 }, { "epoch": 1.238590308370044, "grad_norm": 1.8089615708578142, "learning_rate": 6.938658600010734e-06, "loss": 0.6744752526283264, "step": 3515 }, { "epoch": 1.238942731277533, "grad_norm": 1.851260674535246, "learning_rate": 6.9331124587344655e-06, "loss": 0.537495493888855, "step": 3516 }, { "epoch": 1.2392951541850221, "grad_norm": 1.7599394880527937, "learning_rate": 6.92756735857107e-06, "loss": 0.8405104875564575, "step": 3517 }, { "epoch": 1.239647577092511, "grad_norm": 1.7838209985249966, "learning_rate": 6.92202330140294e-06, "loss": 0.6751723885536194, "step": 3518 }, { "epoch": 1.24, "grad_norm": 1.8012761946666955, "learning_rate": 6.9164802891121105e-06, "loss": 0.5763178467750549, "step": 3519 }, { "epoch": 1.240352422907489, "grad_norm": 1.7859481797599979, "learning_rate": 6.910938323580256e-06, "loss": 0.7713793516159058, "step": 3520 }, { "epoch": 1.240704845814978, "grad_norm": 2.0598557028652356, "learning_rate": 6.90539740668871e-06, "loss": 0.6354435682296753, "step": 3521 }, { "epoch": 1.241057268722467, "grad_norm": 1.6780280463346202, "learning_rate": 6.899857540318434e-06, "loss": 0.5121721625328064, "step": 3522 }, { "epoch": 1.241409691629956, "grad_norm": 1.8470903920827393, "learning_rate": 6.894318726350042e-06, "loss": 0.586428165435791, "step": 3523 }, { "epoch": 1.241762114537445, "grad_norm": 1.690234288859414, "learning_rate": 6.888780966663792e-06, "loss": 0.4868311285972595, "step": 3524 }, { "epoch": 1.2421145374449338, "grad_norm": 1.7688170320163026, "learning_rate": 6.883244263139578e-06, "loss": 0.7057775259017944, "step": 3525 }, { "epoch": 1.242466960352423, "grad_norm": 1.630207980484645, "learning_rate": 6.877708617656942e-06, "loss": 0.4993360638618469, "step": 3526 }, { "epoch": 1.242819383259912, "grad_norm": 1.7093781024880734, "learning_rate": 6.872174032095061e-06, "loss": 0.6096793413162231, "step": 3527 }, { "epoch": 1.2431718061674009, "grad_norm": 1.7005141830755592, "learning_rate": 6.866640508332751e-06, "loss": 0.584385871887207, "step": 3528 }, { "epoch": 1.2435242290748898, "grad_norm": 1.6033098221924098, "learning_rate": 6.861108048248477e-06, "loss": 0.5857449173927307, "step": 3529 }, { "epoch": 1.243876651982379, "grad_norm": 1.6447411339873705, "learning_rate": 6.855576653720333e-06, "loss": 0.4337875247001648, "step": 3530 }, { "epoch": 1.244229074889868, "grad_norm": 1.924557656954366, "learning_rate": 6.850046326626058e-06, "loss": 0.6949163675308228, "step": 3531 }, { "epoch": 1.2445814977973568, "grad_norm": 2.029468434582643, "learning_rate": 6.844517068843025e-06, "loss": 0.5876098871231079, "step": 3532 }, { "epoch": 1.2449339207048458, "grad_norm": 2.0143379278356153, "learning_rate": 6.838988882248243e-06, "loss": 0.5460488796234131, "step": 3533 }, { "epoch": 1.2452863436123347, "grad_norm": 2.284896657447092, "learning_rate": 6.833461768718365e-06, "loss": 0.6500875949859619, "step": 3534 }, { "epoch": 1.2456387665198239, "grad_norm": 1.9702281980181484, "learning_rate": 6.82793573012967e-06, "loss": 0.6504626274108887, "step": 3535 }, { "epoch": 1.2459911894273128, "grad_norm": 1.8635901517060365, "learning_rate": 6.822410768358072e-06, "loss": 0.6881722211837769, "step": 3536 }, { "epoch": 1.2463436123348017, "grad_norm": 1.7111090644899583, "learning_rate": 6.816886885279132e-06, "loss": 0.6747599840164185, "step": 3537 }, { "epoch": 1.2466960352422907, "grad_norm": 2.61809094535544, "learning_rate": 6.811364082768028e-06, "loss": 0.5987570285797119, "step": 3538 }, { "epoch": 1.2470484581497798, "grad_norm": 1.8641726073707956, "learning_rate": 6.8058423626995885e-06, "loss": 0.6614603996276855, "step": 3539 }, { "epoch": 1.2474008810572688, "grad_norm": 1.5529990518062367, "learning_rate": 6.80032172694826e-06, "loss": 0.542367696762085, "step": 3540 }, { "epoch": 1.2477533039647577, "grad_norm": 1.7771584963866378, "learning_rate": 6.7948021773881235e-06, "loss": 0.6200593709945679, "step": 3541 }, { "epoch": 1.2481057268722466, "grad_norm": 1.896811225090905, "learning_rate": 6.789283715892905e-06, "loss": 0.6425306797027588, "step": 3542 }, { "epoch": 1.2484581497797356, "grad_norm": 1.4798584901842344, "learning_rate": 6.78376634433594e-06, "loss": 0.5277592539787292, "step": 3543 }, { "epoch": 1.2488105726872247, "grad_norm": 1.8357663435279958, "learning_rate": 6.778250064590206e-06, "loss": 0.6120523810386658, "step": 3544 }, { "epoch": 1.2491629955947137, "grad_norm": 2.0042129559914654, "learning_rate": 6.772734878528313e-06, "loss": 0.538428544998169, "step": 3545 }, { "epoch": 1.2495154185022026, "grad_norm": 1.7456851140249008, "learning_rate": 6.76722078802249e-06, "loss": 0.6439732909202576, "step": 3546 }, { "epoch": 1.2498678414096915, "grad_norm": 1.5580174742798336, "learning_rate": 6.761707794944605e-06, "loss": 0.5951697826385498, "step": 3547 }, { "epoch": 1.2502202643171807, "grad_norm": 1.5461650468928614, "learning_rate": 6.7561959011661456e-06, "loss": 0.5548606514930725, "step": 3548 }, { "epoch": 1.2505726872246696, "grad_norm": 1.936721806656616, "learning_rate": 6.750685108558221e-06, "loss": 0.4768974781036377, "step": 3549 }, { "epoch": 1.2509251101321586, "grad_norm": 1.6130866640641843, "learning_rate": 6.745175418991585e-06, "loss": 0.6629552245140076, "step": 3550 }, { "epoch": 1.2512775330396475, "grad_norm": 1.8826604922139925, "learning_rate": 6.739666834336599e-06, "loss": 0.6550329923629761, "step": 3551 }, { "epoch": 1.2516299559471364, "grad_norm": 1.7091222991512534, "learning_rate": 6.734159356463254e-06, "loss": 0.37340015172958374, "step": 3552 }, { "epoch": 1.2519823788546256, "grad_norm": 2.0454082069330424, "learning_rate": 6.728652987241175e-06, "loss": 0.6343201398849487, "step": 3553 }, { "epoch": 1.2523348017621145, "grad_norm": 1.8938201811077042, "learning_rate": 6.723147728539596e-06, "loss": 0.7555221319198608, "step": 3554 }, { "epoch": 1.2526872246696035, "grad_norm": 1.7356069524639768, "learning_rate": 6.717643582227384e-06, "loss": 0.5944523215293884, "step": 3555 }, { "epoch": 1.2530396475770926, "grad_norm": 1.627279375354834, "learning_rate": 6.71214055017303e-06, "loss": 0.5686212778091431, "step": 3556 }, { "epoch": 1.2533920704845816, "grad_norm": 1.697482530075543, "learning_rate": 6.706638634244629e-06, "loss": 0.6401857137680054, "step": 3557 }, { "epoch": 1.2537444933920705, "grad_norm": 1.5933991655989903, "learning_rate": 6.701137836309926e-06, "loss": 0.4571516513824463, "step": 3558 }, { "epoch": 1.2540969162995594, "grad_norm": 1.7606001647916119, "learning_rate": 6.695638158236255e-06, "loss": 0.5857570171356201, "step": 3559 }, { "epoch": 1.2544493392070484, "grad_norm": 1.7187772621235449, "learning_rate": 6.690139601890601e-06, "loss": 0.6981472969055176, "step": 3560 }, { "epoch": 1.2548017621145373, "grad_norm": 1.685629147285753, "learning_rate": 6.684642169139544e-06, "loss": 0.5120254755020142, "step": 3561 }, { "epoch": 1.2551541850220265, "grad_norm": 2.043587366608814, "learning_rate": 6.67914586184929e-06, "loss": 0.6975923776626587, "step": 3562 }, { "epoch": 1.2555066079295154, "grad_norm": 2.1694224742588233, "learning_rate": 6.673650681885668e-06, "loss": 0.5825072526931763, "step": 3563 }, { "epoch": 1.2558590308370043, "grad_norm": 1.9388578444875513, "learning_rate": 6.668156631114124e-06, "loss": 0.5701749324798584, "step": 3564 }, { "epoch": 1.2562114537444935, "grad_norm": 1.6715281124187895, "learning_rate": 6.662663711399705e-06, "loss": 0.5230482220649719, "step": 3565 }, { "epoch": 1.2565638766519824, "grad_norm": 1.7540798103539514, "learning_rate": 6.657171924607102e-06, "loss": 0.6680361032485962, "step": 3566 }, { "epoch": 1.2569162995594714, "grad_norm": 1.7792330481880054, "learning_rate": 6.651681272600592e-06, "loss": 0.6745159029960632, "step": 3567 }, { "epoch": 1.2572687224669603, "grad_norm": 1.5777367956881352, "learning_rate": 6.646191757244089e-06, "loss": 0.587162971496582, "step": 3568 }, { "epoch": 1.2576211453744492, "grad_norm": 2.0091715660610183, "learning_rate": 6.640703380401111e-06, "loss": 0.6170785427093506, "step": 3569 }, { "epoch": 1.2579735682819384, "grad_norm": 1.8496931248102404, "learning_rate": 6.6352161439347875e-06, "loss": 0.4955494999885559, "step": 3570 }, { "epoch": 1.2583259911894273, "grad_norm": 1.8039519732213443, "learning_rate": 6.62973004970787e-06, "loss": 0.7183424234390259, "step": 3571 }, { "epoch": 1.2586784140969163, "grad_norm": 1.6920151696252388, "learning_rate": 6.624245099582713e-06, "loss": 0.6266030669212341, "step": 3572 }, { "epoch": 1.2590308370044052, "grad_norm": 1.8260182971737482, "learning_rate": 6.6187612954212845e-06, "loss": 0.5234469175338745, "step": 3573 }, { "epoch": 1.2593832599118944, "grad_norm": 2.0762206956902234, "learning_rate": 6.6132786390851725e-06, "loss": 0.7066231966018677, "step": 3574 }, { "epoch": 1.2597356828193833, "grad_norm": 1.8486791061565373, "learning_rate": 6.60779713243556e-06, "loss": 0.622086226940155, "step": 3575 }, { "epoch": 1.2600881057268722, "grad_norm": 2.003110770323092, "learning_rate": 6.6023167773332554e-06, "loss": 0.6607370376586914, "step": 3576 }, { "epoch": 1.2604405286343612, "grad_norm": 1.9512971078148649, "learning_rate": 6.596837575638663e-06, "loss": 0.6846165657043457, "step": 3577 }, { "epoch": 1.2607929515418501, "grad_norm": 2.1137757907106574, "learning_rate": 6.5913595292118024e-06, "loss": 0.6329103708267212, "step": 3578 }, { "epoch": 1.2611453744493393, "grad_norm": 1.7067433363159659, "learning_rate": 6.585882639912302e-06, "loss": 0.7942261695861816, "step": 3579 }, { "epoch": 1.2614977973568282, "grad_norm": 1.923592126322299, "learning_rate": 6.580406909599393e-06, "loss": 0.5446548461914062, "step": 3580 }, { "epoch": 1.2618502202643171, "grad_norm": 2.584270827853736, "learning_rate": 6.574932340131917e-06, "loss": 0.581193208694458, "step": 3581 }, { "epoch": 1.2622026431718063, "grad_norm": 1.789761494779322, "learning_rate": 6.569458933368323e-06, "loss": 0.6099729537963867, "step": 3582 }, { "epoch": 1.2625550660792952, "grad_norm": 1.7689292642576144, "learning_rate": 6.563986691166655e-06, "loss": 0.45215970277786255, "step": 3583 }, { "epoch": 1.2629074889867842, "grad_norm": 1.9037008934232844, "learning_rate": 6.558515615384573e-06, "loss": 0.6674731969833374, "step": 3584 }, { "epoch": 1.2632599118942731, "grad_norm": 1.4782940862298068, "learning_rate": 6.553045707879338e-06, "loss": 0.4951098561286926, "step": 3585 }, { "epoch": 1.263612334801762, "grad_norm": 1.7852149202748289, "learning_rate": 6.54757697050781e-06, "loss": 0.5853816270828247, "step": 3586 }, { "epoch": 1.263964757709251, "grad_norm": 1.5907197274079232, "learning_rate": 6.5421094051264575e-06, "loss": 0.5236951112747192, "step": 3587 }, { "epoch": 1.2643171806167401, "grad_norm": 1.733068587169355, "learning_rate": 6.536643013591347e-06, "loss": 0.5717612504959106, "step": 3588 }, { "epoch": 1.264669603524229, "grad_norm": 2.033496211612474, "learning_rate": 6.531177797758155e-06, "loss": 0.6144098043441772, "step": 3589 }, { "epoch": 1.265022026431718, "grad_norm": 1.6355266077439052, "learning_rate": 6.525713759482144e-06, "loss": 0.5634705424308777, "step": 3590 }, { "epoch": 1.2653744493392072, "grad_norm": 1.7147225194337798, "learning_rate": 6.520250900618186e-06, "loss": 0.582956075668335, "step": 3591 }, { "epoch": 1.265726872246696, "grad_norm": 1.843768096592032, "learning_rate": 6.514789223020754e-06, "loss": 0.7649297714233398, "step": 3592 }, { "epoch": 1.266079295154185, "grad_norm": 1.6261733555902604, "learning_rate": 6.509328728543918e-06, "loss": 0.6035098433494568, "step": 3593 }, { "epoch": 1.266431718061674, "grad_norm": 1.8493319579504743, "learning_rate": 6.503869419041344e-06, "loss": 0.6405705809593201, "step": 3594 }, { "epoch": 1.266784140969163, "grad_norm": 2.26304309310324, "learning_rate": 6.498411296366299e-06, "loss": 0.674353301525116, "step": 3595 }, { "epoch": 1.2671365638766519, "grad_norm": 1.7621656180677492, "learning_rate": 6.492954362371644e-06, "loss": 0.6018465757369995, "step": 3596 }, { "epoch": 1.267488986784141, "grad_norm": 2.127137234030612, "learning_rate": 6.487498618909845e-06, "loss": 0.6491270065307617, "step": 3597 }, { "epoch": 1.26784140969163, "grad_norm": 1.6636292273445474, "learning_rate": 6.4820440678329474e-06, "loss": 0.5126988887786865, "step": 3598 }, { "epoch": 1.2681938325991189, "grad_norm": 1.7884980833676332, "learning_rate": 6.476590710992605e-06, "loss": 0.5931694507598877, "step": 3599 }, { "epoch": 1.268546255506608, "grad_norm": 1.9386898901162777, "learning_rate": 6.471138550240066e-06, "loss": 0.5455423593521118, "step": 3600 }, { "epoch": 1.268898678414097, "grad_norm": 1.6361281925349132, "learning_rate": 6.465687587426166e-06, "loss": 0.4870053231716156, "step": 3601 }, { "epoch": 1.269251101321586, "grad_norm": 1.9069149245463006, "learning_rate": 6.460237824401337e-06, "loss": 0.6434903144836426, "step": 3602 }, { "epoch": 1.2696035242290749, "grad_norm": 1.676899060774639, "learning_rate": 6.454789263015609e-06, "loss": 0.6256476640701294, "step": 3603 }, { "epoch": 1.2699559471365638, "grad_norm": 1.8004511475353204, "learning_rate": 6.449341905118589e-06, "loss": 0.6304135322570801, "step": 3604 }, { "epoch": 1.2703083700440527, "grad_norm": 1.9009929525157667, "learning_rate": 6.443895752559498e-06, "loss": 0.5315194725990295, "step": 3605 }, { "epoch": 1.2706607929515419, "grad_norm": 1.4321615697348329, "learning_rate": 6.438450807187127e-06, "loss": 0.5232852697372437, "step": 3606 }, { "epoch": 1.2710132158590308, "grad_norm": 1.6584356511216338, "learning_rate": 6.433007070849863e-06, "loss": 0.4462543725967407, "step": 3607 }, { "epoch": 1.2713656387665198, "grad_norm": 1.6730765460300174, "learning_rate": 6.4275645453956945e-06, "loss": 0.6347709894180298, "step": 3608 }, { "epoch": 1.271718061674009, "grad_norm": 1.625329738549371, "learning_rate": 6.422123232672182e-06, "loss": 0.5277259349822998, "step": 3609 }, { "epoch": 1.2720704845814979, "grad_norm": 1.7954090025098361, "learning_rate": 6.416683134526486e-06, "loss": 0.6297650933265686, "step": 3610 }, { "epoch": 1.2724229074889868, "grad_norm": 1.7743916636003476, "learning_rate": 6.411244252805351e-06, "loss": 0.503609836101532, "step": 3611 }, { "epoch": 1.2727753303964757, "grad_norm": 1.7300375262211753, "learning_rate": 6.405806589355099e-06, "loss": 0.6026735305786133, "step": 3612 }, { "epoch": 1.2731277533039647, "grad_norm": 1.543883502597784, "learning_rate": 6.400370146021662e-06, "loss": 0.4918368458747864, "step": 3613 }, { "epoch": 1.2734801762114538, "grad_norm": 2.125830682883153, "learning_rate": 6.394934924650532e-06, "loss": 0.6215550899505615, "step": 3614 }, { "epoch": 1.2738325991189428, "grad_norm": 2.1843858701221563, "learning_rate": 6.389500927086801e-06, "loss": 0.6979820728302002, "step": 3615 }, { "epoch": 1.2741850220264317, "grad_norm": 1.9168565956279218, "learning_rate": 6.384068155175143e-06, "loss": 0.5661836266517639, "step": 3616 }, { "epoch": 1.2745374449339206, "grad_norm": 2.2497484972303896, "learning_rate": 6.378636610759812e-06, "loss": 0.699792742729187, "step": 3617 }, { "epoch": 1.2748898678414098, "grad_norm": 2.1298001613626765, "learning_rate": 6.373206295684653e-06, "loss": 0.6418631076812744, "step": 3618 }, { "epoch": 1.2752422907488987, "grad_norm": 1.639324838954067, "learning_rate": 6.3677772117930895e-06, "loss": 0.4975489675998688, "step": 3619 }, { "epoch": 1.2755947136563877, "grad_norm": 1.6787243090627195, "learning_rate": 6.362349360928117e-06, "loss": 0.5621567964553833, "step": 3620 }, { "epoch": 1.2759471365638766, "grad_norm": 1.9441609125211634, "learning_rate": 6.356922744932335e-06, "loss": 0.538573682308197, "step": 3621 }, { "epoch": 1.2762995594713655, "grad_norm": 1.8099521315485383, "learning_rate": 6.351497365647903e-06, "loss": 0.5726763010025024, "step": 3622 }, { "epoch": 1.2766519823788547, "grad_norm": 1.509968688666824, "learning_rate": 6.346073224916565e-06, "loss": 0.5911343097686768, "step": 3623 }, { "epoch": 1.2770044052863436, "grad_norm": 1.8960352229890238, "learning_rate": 6.340650324579658e-06, "loss": 0.6181383728981018, "step": 3624 }, { "epoch": 1.2773568281938326, "grad_norm": 1.8065087463718459, "learning_rate": 6.3352286664780785e-06, "loss": 0.5941140651702881, "step": 3625 }, { "epoch": 1.2777092511013217, "grad_norm": 1.980034412220703, "learning_rate": 6.329808252452316e-06, "loss": 0.7604472637176514, "step": 3626 }, { "epoch": 1.2780616740088107, "grad_norm": 1.7265138262893938, "learning_rate": 6.324389084342435e-06, "loss": 0.6063867211341858, "step": 3627 }, { "epoch": 1.2784140969162996, "grad_norm": 1.8844241099487, "learning_rate": 6.3189711639880644e-06, "loss": 0.7202302813529968, "step": 3628 }, { "epoch": 1.2787665198237885, "grad_norm": 1.7295127580755116, "learning_rate": 6.313554493228431e-06, "loss": 0.5934856534004211, "step": 3629 }, { "epoch": 1.2791189427312775, "grad_norm": 1.7905829637835577, "learning_rate": 6.3081390739023175e-06, "loss": 0.6403088569641113, "step": 3630 }, { "epoch": 1.2794713656387664, "grad_norm": 1.9400757232043577, "learning_rate": 6.302724907848096e-06, "loss": 0.6679831743240356, "step": 3631 }, { "epoch": 1.2798237885462556, "grad_norm": 1.9107919043768602, "learning_rate": 6.297311996903703e-06, "loss": 0.6914902329444885, "step": 3632 }, { "epoch": 1.2801762114537445, "grad_norm": 1.4865016000129294, "learning_rate": 6.2919003429066535e-06, "loss": 0.5391600131988525, "step": 3633 }, { "epoch": 1.2805286343612334, "grad_norm": 1.7774288854868727, "learning_rate": 6.286489947694041e-06, "loss": 0.5740962028503418, "step": 3634 }, { "epoch": 1.2808810572687226, "grad_norm": 1.9144175178404335, "learning_rate": 6.281080813102523e-06, "loss": 0.6497045159339905, "step": 3635 }, { "epoch": 1.2812334801762115, "grad_norm": 1.6649274023798961, "learning_rate": 6.275672940968326e-06, "loss": 0.5481048226356506, "step": 3636 }, { "epoch": 1.2815859030837005, "grad_norm": 1.6547388155087517, "learning_rate": 6.270266333127266e-06, "loss": 0.5412508249282837, "step": 3637 }, { "epoch": 1.2819383259911894, "grad_norm": 1.8289845737684471, "learning_rate": 6.264860991414709e-06, "loss": 0.5055446624755859, "step": 3638 }, { "epoch": 1.2822907488986783, "grad_norm": 1.9772143213144648, "learning_rate": 6.259456917665605e-06, "loss": 0.6073929071426392, "step": 3639 }, { "epoch": 1.2826431718061673, "grad_norm": 1.6297327309789957, "learning_rate": 6.254054113714467e-06, "loss": 0.5277928113937378, "step": 3640 }, { "epoch": 1.2829955947136564, "grad_norm": 1.7440990717646376, "learning_rate": 6.248652581395378e-06, "loss": 0.5106299519538879, "step": 3641 }, { "epoch": 1.2833480176211454, "grad_norm": 1.612143250274434, "learning_rate": 6.243252322541993e-06, "loss": 0.485049843788147, "step": 3642 }, { "epoch": 1.2837004405286343, "grad_norm": 2.0115453178937894, "learning_rate": 6.237853338987532e-06, "loss": 0.5899066925048828, "step": 3643 }, { "epoch": 1.2840528634361235, "grad_norm": 1.6956228425038977, "learning_rate": 6.2324556325647745e-06, "loss": 0.5761981010437012, "step": 3644 }, { "epoch": 1.2844052863436124, "grad_norm": 1.732932337254408, "learning_rate": 6.227059205106085e-06, "loss": 0.6288208961486816, "step": 3645 }, { "epoch": 1.2847577092511013, "grad_norm": 1.7671756166643349, "learning_rate": 6.2216640584433726e-06, "loss": 0.6122645139694214, "step": 3646 }, { "epoch": 1.2851101321585903, "grad_norm": 1.8312838317562172, "learning_rate": 6.2162701944081295e-06, "loss": 0.5838489532470703, "step": 3647 }, { "epoch": 1.2854625550660792, "grad_norm": 1.5533740438356287, "learning_rate": 6.2108776148314005e-06, "loss": 0.6020689606666565, "step": 3648 }, { "epoch": 1.2858149779735684, "grad_norm": 1.9453055966993607, "learning_rate": 6.205486321543798e-06, "loss": 0.5852698683738708, "step": 3649 }, { "epoch": 1.2861674008810573, "grad_norm": 1.7649785944212673, "learning_rate": 6.2000963163755015e-06, "loss": 0.560903012752533, "step": 3650 }, { "epoch": 1.2865198237885462, "grad_norm": 2.053972717306982, "learning_rate": 6.194707601156249e-06, "loss": 0.7750356197357178, "step": 3651 }, { "epoch": 1.2868722466960352, "grad_norm": 1.7842589241914402, "learning_rate": 6.189320177715338e-06, "loss": 0.5503605604171753, "step": 3652 }, { "epoch": 1.2872246696035243, "grad_norm": 1.8162609150425584, "learning_rate": 6.183934047881636e-06, "loss": 0.6910672187805176, "step": 3653 }, { "epoch": 1.2875770925110133, "grad_norm": 1.6952370527492193, "learning_rate": 6.1785492134835626e-06, "loss": 0.7773069739341736, "step": 3654 }, { "epoch": 1.2879295154185022, "grad_norm": 1.7765631560225321, "learning_rate": 6.173165676349103e-06, "loss": 0.6777454018592834, "step": 3655 }, { "epoch": 1.2882819383259911, "grad_norm": 1.6097825614884171, "learning_rate": 6.167783438305803e-06, "loss": 0.6103118658065796, "step": 3656 }, { "epoch": 1.28863436123348, "grad_norm": 2.4016366240266454, "learning_rate": 6.1624025011807595e-06, "loss": 0.593717634677887, "step": 3657 }, { "epoch": 1.2889867841409692, "grad_norm": 1.700445284940488, "learning_rate": 6.1570228668006395e-06, "loss": 0.5822824835777283, "step": 3658 }, { "epoch": 1.2893392070484582, "grad_norm": 1.7095957018221146, "learning_rate": 6.151644536991656e-06, "loss": 0.5180603861808777, "step": 3659 }, { "epoch": 1.289691629955947, "grad_norm": 1.799926440179644, "learning_rate": 6.14626751357959e-06, "loss": 0.6283069849014282, "step": 3660 }, { "epoch": 1.290044052863436, "grad_norm": 2.2706339647511613, "learning_rate": 6.14089179838977e-06, "loss": 0.7590633630752563, "step": 3661 }, { "epoch": 1.2903964757709252, "grad_norm": 1.4238309589699358, "learning_rate": 6.135517393247081e-06, "loss": 0.6044079661369324, "step": 3662 }, { "epoch": 1.2907488986784141, "grad_norm": 2.078820338247561, "learning_rate": 6.130144299975973e-06, "loss": 0.603421688079834, "step": 3663 }, { "epoch": 1.291101321585903, "grad_norm": 1.9398452395479244, "learning_rate": 6.1247725204004395e-06, "loss": 0.577094554901123, "step": 3664 }, { "epoch": 1.291453744493392, "grad_norm": 1.7780187513951604, "learning_rate": 6.119402056344033e-06, "loss": 0.5752004981040955, "step": 3665 }, { "epoch": 1.291806167400881, "grad_norm": 1.6979532493457608, "learning_rate": 6.114032909629863e-06, "loss": 0.730962872505188, "step": 3666 }, { "epoch": 1.29215859030837, "grad_norm": 2.0386068832784465, "learning_rate": 6.108665082080578e-06, "loss": 0.5361749529838562, "step": 3667 }, { "epoch": 1.292511013215859, "grad_norm": 1.470729033877409, "learning_rate": 6.103298575518401e-06, "loss": 0.4841603636741638, "step": 3668 }, { "epoch": 1.292863436123348, "grad_norm": 1.706501413292354, "learning_rate": 6.097933391765087e-06, "loss": 0.6614999771118164, "step": 3669 }, { "epoch": 1.2932158590308371, "grad_norm": 1.6930402108862321, "learning_rate": 6.092569532641947e-06, "loss": 0.6088405847549438, "step": 3670 }, { "epoch": 1.293568281938326, "grad_norm": 1.9173247230823398, "learning_rate": 6.087206999969848e-06, "loss": 0.601859986782074, "step": 3671 }, { "epoch": 1.293920704845815, "grad_norm": 1.8019332247534052, "learning_rate": 6.081845795569204e-06, "loss": 0.5724194049835205, "step": 3672 }, { "epoch": 1.294273127753304, "grad_norm": 1.7101141845528827, "learning_rate": 6.07648592125997e-06, "loss": 0.7899144887924194, "step": 3673 }, { "epoch": 1.2946255506607929, "grad_norm": 1.8438581079047975, "learning_rate": 6.071127378861667e-06, "loss": 0.5778594017028809, "step": 3674 }, { "epoch": 1.2949779735682818, "grad_norm": 1.6768623613769682, "learning_rate": 6.065770170193342e-06, "loss": 0.6357566118240356, "step": 3675 }, { "epoch": 1.295330396475771, "grad_norm": 1.5951400768860937, "learning_rate": 6.0604142970736115e-06, "loss": 0.511436939239502, "step": 3676 }, { "epoch": 1.29568281938326, "grad_norm": 1.883542435313207, "learning_rate": 6.0550597613206205e-06, "loss": 0.6469998955726624, "step": 3677 }, { "epoch": 1.2960352422907488, "grad_norm": 1.5730405198836903, "learning_rate": 6.049706564752069e-06, "loss": 0.5724819898605347, "step": 3678 }, { "epoch": 1.296387665198238, "grad_norm": 1.5360587172523898, "learning_rate": 6.044354709185203e-06, "loss": 0.6567148566246033, "step": 3679 }, { "epoch": 1.296740088105727, "grad_norm": 1.8931575903206552, "learning_rate": 6.039004196436807e-06, "loss": 0.6694033145904541, "step": 3680 }, { "epoch": 1.2970925110132159, "grad_norm": 1.8190573258877898, "learning_rate": 6.033655028323215e-06, "loss": 0.5147275924682617, "step": 3681 }, { "epoch": 1.2974449339207048, "grad_norm": 2.0405860057138256, "learning_rate": 6.0283072066603075e-06, "loss": 0.5881609320640564, "step": 3682 }, { "epoch": 1.2977973568281937, "grad_norm": 1.7248898652229567, "learning_rate": 6.022960733263493e-06, "loss": 0.625927209854126, "step": 3683 }, { "epoch": 1.2981497797356827, "grad_norm": 1.8738096752650604, "learning_rate": 6.017615609947747e-06, "loss": 0.693459153175354, "step": 3684 }, { "epoch": 1.2985022026431718, "grad_norm": 1.6745028766810846, "learning_rate": 6.0122718385275615e-06, "loss": 0.5185744762420654, "step": 3685 }, { "epoch": 1.2988546255506608, "grad_norm": 1.7625922291600025, "learning_rate": 6.006929420816982e-06, "loss": 0.5153995752334595, "step": 3686 }, { "epoch": 1.2992070484581497, "grad_norm": 1.9617946738772851, "learning_rate": 6.001588358629598e-06, "loss": 0.5844067931175232, "step": 3687 }, { "epoch": 1.2995594713656389, "grad_norm": 1.7999387557140187, "learning_rate": 5.996248653778529e-06, "loss": 0.6021767854690552, "step": 3688 }, { "epoch": 1.2999118942731278, "grad_norm": 1.650868828635221, "learning_rate": 5.990910308076443e-06, "loss": 0.573150098323822, "step": 3689 }, { "epoch": 1.3002643171806167, "grad_norm": 1.8809065032795727, "learning_rate": 5.985573323335541e-06, "loss": 0.5125507116317749, "step": 3690 }, { "epoch": 1.3006167400881057, "grad_norm": 1.5884199689542184, "learning_rate": 5.980237701367556e-06, "loss": 0.541732668876648, "step": 3691 }, { "epoch": 1.3009691629955946, "grad_norm": 2.0151748973563577, "learning_rate": 5.974903443983778e-06, "loss": 0.66359543800354, "step": 3692 }, { "epoch": 1.3013215859030838, "grad_norm": 1.8831727632454829, "learning_rate": 5.969570552995014e-06, "loss": 0.6986300349235535, "step": 3693 }, { "epoch": 1.3016740088105727, "grad_norm": 2.0800644206104195, "learning_rate": 5.9642390302116125e-06, "loss": 0.6829022169113159, "step": 3694 }, { "epoch": 1.3020264317180616, "grad_norm": 1.9073088749861613, "learning_rate": 5.9589088774434655e-06, "loss": 0.5710464715957642, "step": 3695 }, { "epoch": 1.3023788546255506, "grad_norm": 1.8154393300824316, "learning_rate": 5.953580096499989e-06, "loss": 0.5604938268661499, "step": 3696 }, { "epoch": 1.3027312775330397, "grad_norm": 1.755426899711885, "learning_rate": 5.948252689190141e-06, "loss": 0.678723931312561, "step": 3697 }, { "epoch": 1.3030837004405287, "grad_norm": 1.8845664461665383, "learning_rate": 5.9429266573224145e-06, "loss": 0.6652591228485107, "step": 3698 }, { "epoch": 1.3034361233480176, "grad_norm": 1.8800654237619134, "learning_rate": 5.937602002704819e-06, "loss": 0.6141147017478943, "step": 3699 }, { "epoch": 1.3037885462555066, "grad_norm": 1.937561336880738, "learning_rate": 5.932278727144924e-06, "loss": 0.5260860919952393, "step": 3700 }, { "epoch": 1.3041409691629955, "grad_norm": 1.6945627397292862, "learning_rate": 5.926956832449806e-06, "loss": 0.464357852935791, "step": 3701 }, { "epoch": 1.3044933920704846, "grad_norm": 1.8301641414278105, "learning_rate": 5.921636320426085e-06, "loss": 0.6513686180114746, "step": 3702 }, { "epoch": 1.3048458149779736, "grad_norm": 1.7297134138158161, "learning_rate": 5.91631719287991e-06, "loss": 0.44547855854034424, "step": 3703 }, { "epoch": 1.3051982378854625, "grad_norm": 1.8572950621020996, "learning_rate": 5.910999451616959e-06, "loss": 0.714026153087616, "step": 3704 }, { "epoch": 1.3055506607929517, "grad_norm": 1.5164059156260825, "learning_rate": 5.90568309844244e-06, "loss": 0.48294252157211304, "step": 3705 }, { "epoch": 1.3059030837004406, "grad_norm": 2.0148835282111275, "learning_rate": 5.900368135161093e-06, "loss": 0.587759256362915, "step": 3706 }, { "epoch": 1.3062555066079296, "grad_norm": 1.7833437474608147, "learning_rate": 5.895054563577172e-06, "loss": 0.6251810789108276, "step": 3707 }, { "epoch": 1.3066079295154185, "grad_norm": 1.98023378159902, "learning_rate": 5.889742385494481e-06, "loss": 0.6488438844680786, "step": 3708 }, { "epoch": 1.3069603524229074, "grad_norm": 2.3062951128393325, "learning_rate": 5.8844316027163315e-06, "loss": 0.6682882308959961, "step": 3709 }, { "epoch": 1.3073127753303964, "grad_norm": 1.9459894886811675, "learning_rate": 5.879122217045573e-06, "loss": 0.6537875533103943, "step": 3710 }, { "epoch": 1.3076651982378855, "grad_norm": 1.994395753049965, "learning_rate": 5.873814230284576e-06, "loss": 0.6813541650772095, "step": 3711 }, { "epoch": 1.3080176211453745, "grad_norm": 2.002875607232805, "learning_rate": 5.868507644235233e-06, "loss": 0.6962395906448364, "step": 3712 }, { "epoch": 1.3083700440528634, "grad_norm": 1.8811127927416966, "learning_rate": 5.863202460698972e-06, "loss": 0.6872841119766235, "step": 3713 }, { "epoch": 1.3087224669603525, "grad_norm": 2.007681646131619, "learning_rate": 5.857898681476732e-06, "loss": 0.7200508117675781, "step": 3714 }, { "epoch": 1.3090748898678415, "grad_norm": 1.7850989505478374, "learning_rate": 5.852596308368982e-06, "loss": 0.6100003719329834, "step": 3715 }, { "epoch": 1.3094273127753304, "grad_norm": 1.962305695853223, "learning_rate": 5.847295343175714e-06, "loss": 0.7347345352172852, "step": 3716 }, { "epoch": 1.3097797356828194, "grad_norm": 1.8094012131106647, "learning_rate": 5.841995787696438e-06, "loss": 0.6955733895301819, "step": 3717 }, { "epoch": 1.3101321585903083, "grad_norm": 1.6497459626323396, "learning_rate": 5.836697643730193e-06, "loss": 0.5266987085342407, "step": 3718 }, { "epoch": 1.3104845814977972, "grad_norm": 1.7072540878561502, "learning_rate": 5.83140091307553e-06, "loss": 0.5978814363479614, "step": 3719 }, { "epoch": 1.3108370044052864, "grad_norm": 1.9008641546548906, "learning_rate": 5.826105597530526e-06, "loss": 0.608231782913208, "step": 3720 }, { "epoch": 1.3111894273127753, "grad_norm": 1.660571967924875, "learning_rate": 5.820811698892775e-06, "loss": 0.5834963321685791, "step": 3721 }, { "epoch": 1.3115418502202643, "grad_norm": 1.7715871926900555, "learning_rate": 5.8155192189593915e-06, "loss": 0.6675208806991577, "step": 3722 }, { "epoch": 1.3118942731277534, "grad_norm": 2.0125396897962156, "learning_rate": 5.810228159527003e-06, "loss": 0.655093789100647, "step": 3723 }, { "epoch": 1.3122466960352424, "grad_norm": 1.832975656309839, "learning_rate": 5.804938522391768e-06, "loss": 0.5658842921257019, "step": 3724 }, { "epoch": 1.3125991189427313, "grad_norm": 1.7484570770381627, "learning_rate": 5.799650309349348e-06, "loss": 0.4502618610858917, "step": 3725 }, { "epoch": 1.3129515418502202, "grad_norm": 1.6150871905896036, "learning_rate": 5.79436352219493e-06, "loss": 0.6165845394134521, "step": 3726 }, { "epoch": 1.3133039647577092, "grad_norm": 1.6734001609648903, "learning_rate": 5.7890781627232115e-06, "loss": 0.6315968036651611, "step": 3727 }, { "epoch": 1.313656387665198, "grad_norm": 1.5048326218576167, "learning_rate": 5.783794232728408e-06, "loss": 0.58831787109375, "step": 3728 }, { "epoch": 1.3140088105726873, "grad_norm": 1.7597864288310854, "learning_rate": 5.778511734004248e-06, "loss": 0.5056396722793579, "step": 3729 }, { "epoch": 1.3143612334801762, "grad_norm": 2.3417954571274753, "learning_rate": 5.773230668343978e-06, "loss": 0.5469251871109009, "step": 3730 }, { "epoch": 1.3147136563876651, "grad_norm": 1.768855633328091, "learning_rate": 5.76795103754035e-06, "loss": 0.7011934518814087, "step": 3731 }, { "epoch": 1.3150660792951543, "grad_norm": 1.574817644372446, "learning_rate": 5.762672843385643e-06, "loss": 0.7080543041229248, "step": 3732 }, { "epoch": 1.3154185022026432, "grad_norm": 1.7812689751161113, "learning_rate": 5.757396087671634e-06, "loss": 0.5180330276489258, "step": 3733 }, { "epoch": 1.3157709251101322, "grad_norm": 1.6465709022018649, "learning_rate": 5.75212077218962e-06, "loss": 0.5282220840454102, "step": 3734 }, { "epoch": 1.316123348017621, "grad_norm": 1.9100789844293367, "learning_rate": 5.746846898730403e-06, "loss": 0.7174440026283264, "step": 3735 }, { "epoch": 1.31647577092511, "grad_norm": 1.7156784573652895, "learning_rate": 5.7415744690843025e-06, "loss": 0.537194013595581, "step": 3736 }, { "epoch": 1.3168281938325992, "grad_norm": 1.714186482517803, "learning_rate": 5.7363034850411415e-06, "loss": 0.7514588832855225, "step": 3737 }, { "epoch": 1.3171806167400881, "grad_norm": 1.6138774970176952, "learning_rate": 5.731033948390252e-06, "loss": 0.601151704788208, "step": 3738 }, { "epoch": 1.317533039647577, "grad_norm": 1.9652638368208295, "learning_rate": 5.7257658609204865e-06, "loss": 0.6046192646026611, "step": 3739 }, { "epoch": 1.317885462555066, "grad_norm": 1.9909773544544114, "learning_rate": 5.720499224420196e-06, "loss": 0.5003835558891296, "step": 3740 }, { "epoch": 1.3182378854625552, "grad_norm": 2.7143275056165237, "learning_rate": 5.715234040677229e-06, "loss": 0.6251966953277588, "step": 3741 }, { "epoch": 1.318590308370044, "grad_norm": 1.9483642954012013, "learning_rate": 5.709970311478961e-06, "loss": 0.6681240797042847, "step": 3742 }, { "epoch": 1.318942731277533, "grad_norm": 1.6278748497204938, "learning_rate": 5.704708038612261e-06, "loss": 0.582561194896698, "step": 3743 }, { "epoch": 1.319295154185022, "grad_norm": 1.8550137845260724, "learning_rate": 5.699447223863508e-06, "loss": 0.5616302490234375, "step": 3744 }, { "epoch": 1.319647577092511, "grad_norm": 1.7452561285826282, "learning_rate": 5.6941878690185835e-06, "loss": 0.6131408214569092, "step": 3745 }, { "epoch": 1.32, "grad_norm": 1.8334584062109562, "learning_rate": 5.688929975862873e-06, "loss": 0.5772547721862793, "step": 3746 }, { "epoch": 1.320352422907489, "grad_norm": 1.7519534139582256, "learning_rate": 5.683673546181274e-06, "loss": 0.5927203893661499, "step": 3747 }, { "epoch": 1.320704845814978, "grad_norm": 1.9849489030223588, "learning_rate": 5.67841858175818e-06, "loss": 0.6001334190368652, "step": 3748 }, { "epoch": 1.321057268722467, "grad_norm": 1.584893703676267, "learning_rate": 5.673165084377479e-06, "loss": 0.4598100781440735, "step": 3749 }, { "epoch": 1.321409691629956, "grad_norm": 1.9316178856088813, "learning_rate": 5.667913055822578e-06, "loss": 0.6455222368240356, "step": 3750 }, { "epoch": 1.321762114537445, "grad_norm": 1.9234057001448424, "learning_rate": 5.662662497876375e-06, "loss": 0.6327164173126221, "step": 3751 }, { "epoch": 1.322114537444934, "grad_norm": 1.7096288638222439, "learning_rate": 5.657413412321271e-06, "loss": 0.6699539422988892, "step": 3752 }, { "epoch": 1.3224669603524228, "grad_norm": 2.0694083676949107, "learning_rate": 5.6521658009391676e-06, "loss": 0.7507830858230591, "step": 3753 }, { "epoch": 1.3228193832599118, "grad_norm": 1.7615687866950613, "learning_rate": 5.646919665511461e-06, "loss": 0.5164662003517151, "step": 3754 }, { "epoch": 1.323171806167401, "grad_norm": 2.267697288539615, "learning_rate": 5.641675007819058e-06, "loss": 0.7059702277183533, "step": 3755 }, { "epoch": 1.3235242290748899, "grad_norm": 2.1165471311290243, "learning_rate": 5.636431829642359e-06, "loss": 0.6535515189170837, "step": 3756 }, { "epoch": 1.3238766519823788, "grad_norm": 1.782117402624855, "learning_rate": 5.631190132761247e-06, "loss": 0.5912176370620728, "step": 3757 }, { "epoch": 1.324229074889868, "grad_norm": 1.6111457739999588, "learning_rate": 5.625949918955126e-06, "loss": 0.6527940034866333, "step": 3758 }, { "epoch": 1.324581497797357, "grad_norm": 1.9751426120017839, "learning_rate": 5.620711190002879e-06, "loss": 0.7236875295639038, "step": 3759 }, { "epoch": 1.3249339207048458, "grad_norm": 2.042390900324052, "learning_rate": 5.6154739476829e-06, "loss": 0.6823146343231201, "step": 3760 }, { "epoch": 1.3252863436123348, "grad_norm": 2.058457581887865, "learning_rate": 5.610238193773061e-06, "loss": 0.5795537233352661, "step": 3761 }, { "epoch": 1.3256387665198237, "grad_norm": 1.90461931046175, "learning_rate": 5.605003930050738e-06, "loss": 0.5530939102172852, "step": 3762 }, { "epoch": 1.3259911894273126, "grad_norm": 1.6978922894801083, "learning_rate": 5.599771158292806e-06, "loss": 0.5362278819084167, "step": 3763 }, { "epoch": 1.3263436123348018, "grad_norm": 1.9521190182519916, "learning_rate": 5.5945398802756315e-06, "loss": 0.6136768460273743, "step": 3764 }, { "epoch": 1.3266960352422907, "grad_norm": 1.7782753118174626, "learning_rate": 5.589310097775055e-06, "loss": 0.5979033708572388, "step": 3765 }, { "epoch": 1.3270484581497797, "grad_norm": 1.810593191069574, "learning_rate": 5.584081812566439e-06, "loss": 0.6750006675720215, "step": 3766 }, { "epoch": 1.3274008810572688, "grad_norm": 1.6815578779160076, "learning_rate": 5.578855026424619e-06, "loss": 0.6004951000213623, "step": 3767 }, { "epoch": 1.3277533039647578, "grad_norm": 1.522422246822047, "learning_rate": 5.573629741123926e-06, "loss": 0.570702075958252, "step": 3768 }, { "epoch": 1.3281057268722467, "grad_norm": 1.5435622334320813, "learning_rate": 5.5684059584381826e-06, "loss": 0.506945788860321, "step": 3769 }, { "epoch": 1.3284581497797356, "grad_norm": 1.647967795112189, "learning_rate": 5.563183680140696e-06, "loss": 0.5935436487197876, "step": 3770 }, { "epoch": 1.3288105726872246, "grad_norm": 2.7715355389110043, "learning_rate": 5.5579629080042755e-06, "loss": 0.641446590423584, "step": 3771 }, { "epoch": 1.3291629955947137, "grad_norm": 1.7489195207611605, "learning_rate": 5.552743643801209e-06, "loss": 0.5816437005996704, "step": 3772 }, { "epoch": 1.3295154185022027, "grad_norm": 1.7699530777692443, "learning_rate": 5.547525889303265e-06, "loss": 0.666487991809845, "step": 3773 }, { "epoch": 1.3298678414096916, "grad_norm": 2.100750588167558, "learning_rate": 5.542309646281718e-06, "loss": 0.7961397767066956, "step": 3774 }, { "epoch": 1.3302202643171805, "grad_norm": 1.5292695888779975, "learning_rate": 5.53709491650732e-06, "loss": 0.4736033082008362, "step": 3775 }, { "epoch": 1.3305726872246697, "grad_norm": 1.8004482810288622, "learning_rate": 5.531881701750304e-06, "loss": 0.542208194732666, "step": 3776 }, { "epoch": 1.3309251101321586, "grad_norm": 1.8151751535940353, "learning_rate": 5.526670003780399e-06, "loss": 0.6306429505348206, "step": 3777 }, { "epoch": 1.3312775330396476, "grad_norm": 1.7520809852323194, "learning_rate": 5.521459824366808e-06, "loss": 0.531991720199585, "step": 3778 }, { "epoch": 1.3316299559471365, "grad_norm": 1.9852873895231067, "learning_rate": 5.516251165278235e-06, "loss": 0.688262939453125, "step": 3779 }, { "epoch": 1.3319823788546254, "grad_norm": 2.0026356133489416, "learning_rate": 5.511044028282853e-06, "loss": 0.7555293440818787, "step": 3780 }, { "epoch": 1.3323348017621146, "grad_norm": 1.9387490035628434, "learning_rate": 5.505838415148317e-06, "loss": 0.7518796324729919, "step": 3781 }, { "epoch": 1.3326872246696035, "grad_norm": 1.859399241253671, "learning_rate": 5.500634327641777e-06, "loss": 0.5161253809928894, "step": 3782 }, { "epoch": 1.3330396475770925, "grad_norm": 1.5897606830745852, "learning_rate": 5.4954317675298586e-06, "loss": 0.5617681741714478, "step": 3783 }, { "epoch": 1.3333920704845814, "grad_norm": 1.6894758792140483, "learning_rate": 5.4902307365786676e-06, "loss": 0.5707885026931763, "step": 3784 }, { "epoch": 1.3337444933920706, "grad_norm": 1.9016603426520955, "learning_rate": 5.485031236553792e-06, "loss": 0.5842025876045227, "step": 3785 }, { "epoch": 1.3340969162995595, "grad_norm": 2.278549510271659, "learning_rate": 5.479833269220296e-06, "loss": 0.7103949785232544, "step": 3786 }, { "epoch": 1.3344493392070484, "grad_norm": 1.8432428404869632, "learning_rate": 5.474636836342737e-06, "loss": 0.7704740762710571, "step": 3787 }, { "epoch": 1.3348017621145374, "grad_norm": 1.808727631247744, "learning_rate": 5.469441939685137e-06, "loss": 0.6402652263641357, "step": 3788 }, { "epoch": 1.3351541850220263, "grad_norm": 1.892219877227891, "learning_rate": 5.464248581011002e-06, "loss": 0.8214348554611206, "step": 3789 }, { "epoch": 1.3355066079295155, "grad_norm": 1.9758909531924576, "learning_rate": 5.459056762083318e-06, "loss": 0.6372429132461548, "step": 3790 }, { "epoch": 1.3358590308370044, "grad_norm": 1.849044346394621, "learning_rate": 5.453866484664543e-06, "loss": 0.5418422222137451, "step": 3791 }, { "epoch": 1.3362114537444933, "grad_norm": 1.7395663492002502, "learning_rate": 5.448677750516613e-06, "loss": 0.6574567556381226, "step": 3792 }, { "epoch": 1.3365638766519825, "grad_norm": 1.9976311809706857, "learning_rate": 5.443490561400948e-06, "loss": 0.5174030661582947, "step": 3793 }, { "epoch": 1.3369162995594714, "grad_norm": 1.5627335899600845, "learning_rate": 5.4383049190784275e-06, "loss": 0.595477819442749, "step": 3794 }, { "epoch": 1.3372687224669604, "grad_norm": 1.845680624563864, "learning_rate": 5.4331208253094255e-06, "loss": 0.6177364587783813, "step": 3795 }, { "epoch": 1.3376211453744493, "grad_norm": 1.6348460055259042, "learning_rate": 5.4279382818537774e-06, "loss": 0.6106897592544556, "step": 3796 }, { "epoch": 1.3379735682819383, "grad_norm": 1.8500671496295353, "learning_rate": 5.422757290470795e-06, "loss": 0.46700483560562134, "step": 3797 }, { "epoch": 1.3383259911894272, "grad_norm": 1.952200717602712, "learning_rate": 5.417577852919262e-06, "loss": 0.5408231019973755, "step": 3798 }, { "epoch": 1.3386784140969163, "grad_norm": 1.8733329229880296, "learning_rate": 5.412399970957439e-06, "loss": 0.6430809497833252, "step": 3799 }, { "epoch": 1.3390308370044053, "grad_norm": 1.9515663922431925, "learning_rate": 5.4072236463430535e-06, "loss": 0.6817858219146729, "step": 3800 }, { "epoch": 1.3393832599118942, "grad_norm": 1.7386331074635664, "learning_rate": 5.402048880833308e-06, "loss": 0.5492604970932007, "step": 3801 }, { "epoch": 1.3397356828193834, "grad_norm": 1.9883458715986422, "learning_rate": 5.39687567618487e-06, "loss": 0.6148543357849121, "step": 3802 }, { "epoch": 1.3400881057268723, "grad_norm": 1.7245960691315507, "learning_rate": 5.391704034153894e-06, "loss": 0.5921820402145386, "step": 3803 }, { "epoch": 1.3404405286343613, "grad_norm": 1.8759210914719033, "learning_rate": 5.386533956495974e-06, "loss": 0.49728113412857056, "step": 3804 }, { "epoch": 1.3407929515418502, "grad_norm": 1.7899218455267007, "learning_rate": 5.381365444966205e-06, "loss": 0.5944808125495911, "step": 3805 }, { "epoch": 1.3411453744493391, "grad_norm": 1.6022996204023598, "learning_rate": 5.376198501319128e-06, "loss": 0.5197580456733704, "step": 3806 }, { "epoch": 1.341497797356828, "grad_norm": 1.5953524266203611, "learning_rate": 5.3710331273087625e-06, "loss": 0.6229256391525269, "step": 3807 }, { "epoch": 1.3418502202643172, "grad_norm": 2.0736813734241073, "learning_rate": 5.365869324688591e-06, "loss": 0.5305753946304321, "step": 3808 }, { "epoch": 1.3422026431718062, "grad_norm": 1.4520191291543518, "learning_rate": 5.360707095211566e-06, "loss": 0.4002259373664856, "step": 3809 }, { "epoch": 1.342555066079295, "grad_norm": 1.8821320745162777, "learning_rate": 5.3555464406300965e-06, "loss": 0.5211426615715027, "step": 3810 }, { "epoch": 1.3429074889867842, "grad_norm": 1.7112007743194535, "learning_rate": 5.350387362696077e-06, "loss": 0.5998013019561768, "step": 3811 }, { "epoch": 1.3432599118942732, "grad_norm": 1.6128635046491597, "learning_rate": 5.345229863160839e-06, "loss": 0.5330953598022461, "step": 3812 }, { "epoch": 1.3436123348017621, "grad_norm": 1.6570398271033384, "learning_rate": 5.340073943775206e-06, "loss": 0.6999118328094482, "step": 3813 }, { "epoch": 1.343964757709251, "grad_norm": 2.060346240780723, "learning_rate": 5.334919606289446e-06, "loss": 0.6286367177963257, "step": 3814 }, { "epoch": 1.34431718061674, "grad_norm": 1.4130805934733843, "learning_rate": 5.329766852453296e-06, "loss": 0.5793008804321289, "step": 3815 }, { "epoch": 1.3446696035242292, "grad_norm": 1.7815340287164039, "learning_rate": 5.324615684015957e-06, "loss": 0.5811383128166199, "step": 3816 }, { "epoch": 1.345022026431718, "grad_norm": 1.8888368809882845, "learning_rate": 5.319466102726087e-06, "loss": 0.7389675378799438, "step": 3817 }, { "epoch": 1.345374449339207, "grad_norm": 1.9482215135863048, "learning_rate": 5.314318110331815e-06, "loss": 0.6105868220329285, "step": 3818 }, { "epoch": 1.345726872246696, "grad_norm": 1.648111237588601, "learning_rate": 5.3091717085807235e-06, "loss": 0.5979465842247009, "step": 3819 }, { "epoch": 1.3460792951541851, "grad_norm": 2.100772248921902, "learning_rate": 5.304026899219846e-06, "loss": 0.6722681522369385, "step": 3820 }, { "epoch": 1.346431718061674, "grad_norm": 1.5469717835195365, "learning_rate": 5.298883683995697e-06, "loss": 0.4687497913837433, "step": 3821 }, { "epoch": 1.346784140969163, "grad_norm": 1.6982574361909266, "learning_rate": 5.29374206465423e-06, "loss": 0.563692569732666, "step": 3822 }, { "epoch": 1.347136563876652, "grad_norm": 1.7298606992172854, "learning_rate": 5.2886020429408716e-06, "loss": 0.604897141456604, "step": 3823 }, { "epoch": 1.3474889867841409, "grad_norm": 2.111770720101543, "learning_rate": 5.283463620600493e-06, "loss": 0.6270164251327515, "step": 3824 }, { "epoch": 1.34784140969163, "grad_norm": 2.1238324371472954, "learning_rate": 5.278326799377428e-06, "loss": 0.6487830877304077, "step": 3825 }, { "epoch": 1.348193832599119, "grad_norm": 1.58718768900561, "learning_rate": 5.273191581015474e-06, "loss": 0.5816935896873474, "step": 3826 }, { "epoch": 1.348546255506608, "grad_norm": 1.72099904065486, "learning_rate": 5.26805796725788e-06, "loss": 0.6281115412712097, "step": 3827 }, { "epoch": 1.348898678414097, "grad_norm": 2.0975447662151288, "learning_rate": 5.2629259598473335e-06, "loss": 0.5031973123550415, "step": 3828 }, { "epoch": 1.349251101321586, "grad_norm": 1.6391975654545219, "learning_rate": 5.257795560526005e-06, "loss": 0.6220165491104126, "step": 3829 }, { "epoch": 1.349603524229075, "grad_norm": 1.8177506583957952, "learning_rate": 5.2526667710354995e-06, "loss": 0.6451058387756348, "step": 3830 }, { "epoch": 1.3499559471365639, "grad_norm": 2.000132155225934, "learning_rate": 5.247539593116884e-06, "loss": 0.7524863481521606, "step": 3831 }, { "epoch": 1.3503083700440528, "grad_norm": 1.7855711080776688, "learning_rate": 5.242414028510674e-06, "loss": 0.6270921230316162, "step": 3832 }, { "epoch": 1.3506607929515417, "grad_norm": 1.8779302666662292, "learning_rate": 5.237290078956836e-06, "loss": 0.6196550130844116, "step": 3833 }, { "epoch": 1.351013215859031, "grad_norm": 1.932517845360487, "learning_rate": 5.232167746194798e-06, "loss": 0.8512230515480042, "step": 3834 }, { "epoch": 1.3513656387665198, "grad_norm": 1.672868645098828, "learning_rate": 5.227047031963435e-06, "loss": 0.5196807980537415, "step": 3835 }, { "epoch": 1.3517180616740088, "grad_norm": 1.890472281368116, "learning_rate": 5.2219279380010565e-06, "loss": 0.6713111400604248, "step": 3836 }, { "epoch": 1.352070484581498, "grad_norm": 1.8891048300322977, "learning_rate": 5.216810466045448e-06, "loss": 0.7150874137878418, "step": 3837 }, { "epoch": 1.3524229074889869, "grad_norm": 1.9379344809365882, "learning_rate": 5.211694617833827e-06, "loss": 0.5812375545501709, "step": 3838 }, { "epoch": 1.3527753303964758, "grad_norm": 1.6232111313971074, "learning_rate": 5.2065803951028675e-06, "loss": 0.5842182040214539, "step": 3839 }, { "epoch": 1.3531277533039647, "grad_norm": 1.7655789614212678, "learning_rate": 5.201467799588685e-06, "loss": 0.5432665348052979, "step": 3840 }, { "epoch": 1.3534801762114537, "grad_norm": 1.610757257105171, "learning_rate": 5.196356833026845e-06, "loss": 0.551771879196167, "step": 3841 }, { "epoch": 1.3538325991189426, "grad_norm": 2.0105503681662076, "learning_rate": 5.19124749715237e-06, "loss": 0.6961710453033447, "step": 3842 }, { "epoch": 1.3541850220264318, "grad_norm": 1.9510922019810755, "learning_rate": 5.18613979369972e-06, "loss": 0.7105714678764343, "step": 3843 }, { "epoch": 1.3545374449339207, "grad_norm": 1.9369232024679732, "learning_rate": 5.181033724402789e-06, "loss": 0.7100229263305664, "step": 3844 }, { "epoch": 1.3548898678414096, "grad_norm": 1.6852711649451124, "learning_rate": 5.175929290994941e-06, "loss": 0.651812732219696, "step": 3845 }, { "epoch": 1.3552422907488988, "grad_norm": 2.308449923325572, "learning_rate": 5.170826495208967e-06, "loss": 0.5194147825241089, "step": 3846 }, { "epoch": 1.3555947136563877, "grad_norm": 1.6095794520986102, "learning_rate": 5.16572533877711e-06, "loss": 0.5939956307411194, "step": 3847 }, { "epoch": 1.3559471365638767, "grad_norm": 1.7731843322868706, "learning_rate": 5.160625823431051e-06, "loss": 0.6434104442596436, "step": 3848 }, { "epoch": 1.3562995594713656, "grad_norm": 1.9584483919337772, "learning_rate": 5.155527950901914e-06, "loss": 0.5256108045578003, "step": 3849 }, { "epoch": 1.3566519823788545, "grad_norm": 1.5746637659323357, "learning_rate": 5.150431722920277e-06, "loss": 0.5632717609405518, "step": 3850 }, { "epoch": 1.3570044052863435, "grad_norm": 1.8450205582439452, "learning_rate": 5.145337141216149e-06, "loss": 0.5964382886886597, "step": 3851 }, { "epoch": 1.3573568281938326, "grad_norm": 1.9383063853676261, "learning_rate": 5.140244207518971e-06, "loss": 0.7268366813659668, "step": 3852 }, { "epoch": 1.3577092511013216, "grad_norm": 2.357958765027834, "learning_rate": 5.135152923557647e-06, "loss": 0.7376477122306824, "step": 3853 }, { "epoch": 1.3580616740088105, "grad_norm": 1.9573550951394243, "learning_rate": 5.130063291060505e-06, "loss": 0.50569748878479, "step": 3854 }, { "epoch": 1.3584140969162997, "grad_norm": 1.684535591269265, "learning_rate": 5.12497531175532e-06, "loss": 0.5639374256134033, "step": 3855 }, { "epoch": 1.3587665198237886, "grad_norm": 2.0009335012534146, "learning_rate": 5.1198889873692994e-06, "loss": 0.5051915645599365, "step": 3856 }, { "epoch": 1.3591189427312775, "grad_norm": 1.979939818228197, "learning_rate": 5.114804319629088e-06, "loss": 0.4718795120716095, "step": 3857 }, { "epoch": 1.3594713656387665, "grad_norm": 1.7040447839749338, "learning_rate": 5.109721310260781e-06, "loss": 0.5684067606925964, "step": 3858 }, { "epoch": 1.3598237885462554, "grad_norm": 1.687205926430453, "learning_rate": 5.104639960989903e-06, "loss": 0.5757609605789185, "step": 3859 }, { "epoch": 1.3601762114537446, "grad_norm": 1.637859976815221, "learning_rate": 5.099560273541401e-06, "loss": 0.5971167087554932, "step": 3860 }, { "epoch": 1.3605286343612335, "grad_norm": 1.9766573766085018, "learning_rate": 5.094482249639683e-06, "loss": 0.6959896683692932, "step": 3861 }, { "epoch": 1.3608810572687224, "grad_norm": 1.8397057454745067, "learning_rate": 5.089405891008574e-06, "loss": 0.6954548358917236, "step": 3862 }, { "epoch": 1.3612334801762114, "grad_norm": 1.5747472561310782, "learning_rate": 5.084331199371343e-06, "loss": 0.5659986138343811, "step": 3863 }, { "epoch": 1.3615859030837005, "grad_norm": 1.9340659365358734, "learning_rate": 5.079258176450687e-06, "loss": 0.5582559108734131, "step": 3864 }, { "epoch": 1.3619383259911895, "grad_norm": 1.5684621947501252, "learning_rate": 5.0741868239687395e-06, "loss": 0.5337075591087341, "step": 3865 }, { "epoch": 1.3622907488986784, "grad_norm": 1.8617666338346237, "learning_rate": 5.069117143647075e-06, "loss": 0.621441125869751, "step": 3866 }, { "epoch": 1.3626431718061673, "grad_norm": 1.7285404952370873, "learning_rate": 5.064049137206677e-06, "loss": 0.5476670861244202, "step": 3867 }, { "epoch": 1.3629955947136563, "grad_norm": 1.9444577342582248, "learning_rate": 5.058982806367989e-06, "loss": 0.5357356071472168, "step": 3868 }, { "epoch": 1.3633480176211454, "grad_norm": 2.032867685216442, "learning_rate": 5.053918152850868e-06, "loss": 0.5722761750221252, "step": 3869 }, { "epoch": 1.3637004405286344, "grad_norm": 1.8019521015311857, "learning_rate": 5.048855178374606e-06, "loss": 0.7271207571029663, "step": 3870 }, { "epoch": 1.3640528634361233, "grad_norm": 2.149716528128109, "learning_rate": 5.043793884657926e-06, "loss": 0.6213557720184326, "step": 3871 }, { "epoch": 1.3644052863436125, "grad_norm": 1.9750542918701046, "learning_rate": 5.03873427341898e-06, "loss": 0.6509476900100708, "step": 3872 }, { "epoch": 1.3647577092511014, "grad_norm": 1.8266690493980986, "learning_rate": 5.0336763463753425e-06, "loss": 0.5321642756462097, "step": 3873 }, { "epoch": 1.3651101321585903, "grad_norm": 1.8114804761469812, "learning_rate": 5.028620105244035e-06, "loss": 0.7237476110458374, "step": 3874 }, { "epoch": 1.3654625550660793, "grad_norm": 2.014453779183698, "learning_rate": 5.0235655517414805e-06, "loss": 0.6653447151184082, "step": 3875 }, { "epoch": 1.3658149779735682, "grad_norm": 1.843622237552059, "learning_rate": 5.018512687583552e-06, "loss": 0.6188938617706299, "step": 3876 }, { "epoch": 1.3661674008810571, "grad_norm": 1.8211870806299153, "learning_rate": 5.013461514485536e-06, "loss": 0.6341606378555298, "step": 3877 }, { "epoch": 1.3665198237885463, "grad_norm": 1.6224290182707664, "learning_rate": 5.00841203416215e-06, "loss": 0.6148994565010071, "step": 3878 }, { "epoch": 1.3668722466960352, "grad_norm": 1.8692541577175399, "learning_rate": 5.003364248327533e-06, "loss": 0.6292222142219543, "step": 3879 }, { "epoch": 1.3672246696035242, "grad_norm": 1.618170468267519, "learning_rate": 4.998318158695255e-06, "loss": 0.6648836135864258, "step": 3880 }, { "epoch": 1.3675770925110133, "grad_norm": 6.866040476375875, "learning_rate": 4.993273766978297e-06, "loss": 0.5175273418426514, "step": 3881 }, { "epoch": 1.3679295154185023, "grad_norm": 1.5661461645683938, "learning_rate": 4.98823107488909e-06, "loss": 0.5686253309249878, "step": 3882 }, { "epoch": 1.3682819383259912, "grad_norm": 1.9697672783538545, "learning_rate": 4.983190084139452e-06, "loss": 0.6128156185150146, "step": 3883 }, { "epoch": 1.3686343612334801, "grad_norm": 1.9331016188284555, "learning_rate": 4.978150796440656e-06, "loss": 0.6849625110626221, "step": 3884 }, { "epoch": 1.368986784140969, "grad_norm": 1.5986771035358114, "learning_rate": 4.973113213503379e-06, "loss": 0.5735955238342285, "step": 3885 }, { "epoch": 1.369339207048458, "grad_norm": 1.6049593584012303, "learning_rate": 4.968077337037724e-06, "loss": 0.4584425091743469, "step": 3886 }, { "epoch": 1.3696916299559472, "grad_norm": 1.9525312670752564, "learning_rate": 4.963043168753212e-06, "loss": 0.547109067440033, "step": 3887 }, { "epoch": 1.3700440528634361, "grad_norm": 2.113357180829694, "learning_rate": 4.9580107103587895e-06, "loss": 0.6966128349304199, "step": 3888 }, { "epoch": 1.370396475770925, "grad_norm": 1.7817002019358994, "learning_rate": 4.952979963562814e-06, "loss": 0.6275819540023804, "step": 3889 }, { "epoch": 1.3707488986784142, "grad_norm": 1.6096829752005641, "learning_rate": 4.94795093007308e-06, "loss": 0.5678467750549316, "step": 3890 }, { "epoch": 1.3711013215859031, "grad_norm": 1.8874234747665013, "learning_rate": 4.942923611596772e-06, "loss": 0.6516115665435791, "step": 3891 }, { "epoch": 1.371453744493392, "grad_norm": 1.8638529672264463, "learning_rate": 4.937898009840518e-06, "loss": 0.6279621124267578, "step": 3892 }, { "epoch": 1.371806167400881, "grad_norm": 1.6187117518672614, "learning_rate": 4.932874126510353e-06, "loss": 0.6123322248458862, "step": 3893 }, { "epoch": 1.37215859030837, "grad_norm": 1.6259761787603553, "learning_rate": 4.927851963311726e-06, "loss": 0.43412432074546814, "step": 3894 }, { "epoch": 1.372511013215859, "grad_norm": 1.859998329311036, "learning_rate": 4.922831521949507e-06, "loss": 0.6582022905349731, "step": 3895 }, { "epoch": 1.372863436123348, "grad_norm": 1.8966645456702385, "learning_rate": 4.917812804127976e-06, "loss": 0.6219466328620911, "step": 3896 }, { "epoch": 1.373215859030837, "grad_norm": 2.056798959647299, "learning_rate": 4.9127958115508365e-06, "loss": 0.5352981090545654, "step": 3897 }, { "epoch": 1.373568281938326, "grad_norm": 1.5240218181276974, "learning_rate": 4.907780545921205e-06, "loss": 0.47646182775497437, "step": 3898 }, { "epoch": 1.373920704845815, "grad_norm": 1.6949945802187276, "learning_rate": 4.902767008941594e-06, "loss": 0.5335453748703003, "step": 3899 }, { "epoch": 1.374273127753304, "grad_norm": 1.7931951401372748, "learning_rate": 4.897755202313954e-06, "loss": 0.576435923576355, "step": 3900 }, { "epoch": 1.374625550660793, "grad_norm": 1.6675338707159029, "learning_rate": 4.8927451277396365e-06, "loss": 0.533431887626648, "step": 3901 }, { "epoch": 1.3749779735682819, "grad_norm": 1.7439550653197133, "learning_rate": 4.8877367869194035e-06, "loss": 0.6892110109329224, "step": 3902 }, { "epoch": 1.3753303964757708, "grad_norm": 1.9209875137364842, "learning_rate": 4.8827301815534335e-06, "loss": 0.7028052806854248, "step": 3903 }, { "epoch": 1.37568281938326, "grad_norm": 1.8413166797931897, "learning_rate": 4.877725313341306e-06, "loss": 0.6883414387702942, "step": 3904 }, { "epoch": 1.376035242290749, "grad_norm": 2.145518516472349, "learning_rate": 4.8727221839820285e-06, "loss": 0.6712944507598877, "step": 3905 }, { "epoch": 1.3763876651982379, "grad_norm": 1.6297297090329885, "learning_rate": 4.867720795174006e-06, "loss": 0.6139085292816162, "step": 3906 }, { "epoch": 1.3767400881057268, "grad_norm": 1.8425831405666082, "learning_rate": 4.862721148615043e-06, "loss": 0.6463953256607056, "step": 3907 }, { "epoch": 1.377092511013216, "grad_norm": 1.768461759599311, "learning_rate": 4.857723246002376e-06, "loss": 0.6790587306022644, "step": 3908 }, { "epoch": 1.3774449339207049, "grad_norm": 1.7177146369820009, "learning_rate": 4.852727089032634e-06, "loss": 0.4996854066848755, "step": 3909 }, { "epoch": 1.3777973568281938, "grad_norm": 1.8098347886488457, "learning_rate": 4.847732679401855e-06, "loss": 0.5826590061187744, "step": 3910 }, { "epoch": 1.3781497797356828, "grad_norm": 1.8997892974208295, "learning_rate": 4.842740018805489e-06, "loss": 0.5044558048248291, "step": 3911 }, { "epoch": 1.3785022026431717, "grad_norm": 1.873679943847948, "learning_rate": 4.837749108938381e-06, "loss": 0.49022918939590454, "step": 3912 }, { "epoch": 1.3788546255506609, "grad_norm": 1.9497488299017371, "learning_rate": 4.832759951494798e-06, "loss": 0.7034850120544434, "step": 3913 }, { "epoch": 1.3792070484581498, "grad_norm": 1.8582811393472771, "learning_rate": 4.827772548168408e-06, "loss": 0.5835636854171753, "step": 3914 }, { "epoch": 1.3795594713656387, "grad_norm": 1.8615896532434415, "learning_rate": 4.822786900652262e-06, "loss": 0.6000608205795288, "step": 3915 }, { "epoch": 1.3799118942731279, "grad_norm": 2.003742345218382, "learning_rate": 4.817803010638847e-06, "loss": 0.6121091842651367, "step": 3916 }, { "epoch": 1.3802643171806168, "grad_norm": 1.80308866184307, "learning_rate": 4.812820879820034e-06, "loss": 0.457197904586792, "step": 3917 }, { "epoch": 1.3806167400881058, "grad_norm": 1.8962611537179284, "learning_rate": 4.807840509887102e-06, "loss": 0.6495843529701233, "step": 3918 }, { "epoch": 1.3809691629955947, "grad_norm": 1.9212587769996015, "learning_rate": 4.80286190253073e-06, "loss": 0.6245059967041016, "step": 3919 }, { "epoch": 1.3813215859030836, "grad_norm": 2.020688644956673, "learning_rate": 4.797885059440998e-06, "loss": 0.5648606419563293, "step": 3920 }, { "epoch": 1.3816740088105726, "grad_norm": 1.93208096226899, "learning_rate": 4.7929099823073945e-06, "loss": 0.6593670845031738, "step": 3921 }, { "epoch": 1.3820264317180617, "grad_norm": 1.8973564890389945, "learning_rate": 4.787936672818807e-06, "loss": 0.6400346159934998, "step": 3922 }, { "epoch": 1.3823788546255507, "grad_norm": 1.8684904083901948, "learning_rate": 4.782965132663505e-06, "loss": 0.6042170524597168, "step": 3923 }, { "epoch": 1.3827312775330396, "grad_norm": 1.8230700495851246, "learning_rate": 4.777995363529184e-06, "loss": 0.6224586963653564, "step": 3924 }, { "epoch": 1.3830837004405288, "grad_norm": 2.09797321253942, "learning_rate": 4.7730273671029235e-06, "loss": 0.6944444179534912, "step": 3925 }, { "epoch": 1.3834361233480177, "grad_norm": 1.976613089140818, "learning_rate": 4.768061145071201e-06, "loss": 0.5871950387954712, "step": 3926 }, { "epoch": 1.3837885462555066, "grad_norm": 1.7713632438369786, "learning_rate": 4.763096699119897e-06, "loss": 0.6438909769058228, "step": 3927 }, { "epoch": 1.3841409691629956, "grad_norm": 1.6141008005869943, "learning_rate": 4.75813403093428e-06, "loss": 0.6338443756103516, "step": 3928 }, { "epoch": 1.3844933920704845, "grad_norm": 2.2680544531424753, "learning_rate": 4.753173142199036e-06, "loss": 0.6343874931335449, "step": 3929 }, { "epoch": 1.3848458149779734, "grad_norm": 1.7233771229601555, "learning_rate": 4.7482140345982174e-06, "loss": 0.5383629202842712, "step": 3930 }, { "epoch": 1.3851982378854626, "grad_norm": 1.8699549247596075, "learning_rate": 4.743256709815289e-06, "loss": 0.5365063548088074, "step": 3931 }, { "epoch": 1.3855506607929515, "grad_norm": 2.2583515376147694, "learning_rate": 4.738301169533116e-06, "loss": 0.6310757398605347, "step": 3932 }, { "epoch": 1.3859030837004405, "grad_norm": 2.1022070754037476, "learning_rate": 4.733347415433946e-06, "loss": 0.7609038949012756, "step": 3933 }, { "epoch": 1.3862555066079296, "grad_norm": 2.174490642392946, "learning_rate": 4.728395449199423e-06, "loss": 0.5837516784667969, "step": 3934 }, { "epoch": 1.3866079295154186, "grad_norm": 1.719340289699717, "learning_rate": 4.7234452725105875e-06, "loss": 0.6075407862663269, "step": 3935 }, { "epoch": 1.3869603524229075, "grad_norm": 1.7651152509667416, "learning_rate": 4.718496887047864e-06, "loss": 0.5246843099594116, "step": 3936 }, { "epoch": 1.3873127753303964, "grad_norm": 1.6874306455639787, "learning_rate": 4.713550294491091e-06, "loss": 0.6256884336471558, "step": 3937 }, { "epoch": 1.3876651982378854, "grad_norm": 1.632156841956259, "learning_rate": 4.708605496519467e-06, "loss": 0.5039727687835693, "step": 3938 }, { "epoch": 1.3880176211453745, "grad_norm": 2.0143508196146196, "learning_rate": 4.703662494811599e-06, "loss": 0.5302769541740417, "step": 3939 }, { "epoch": 1.3883700440528635, "grad_norm": 1.6358403288542849, "learning_rate": 4.698721291045491e-06, "loss": 0.654889702796936, "step": 3940 }, { "epoch": 1.3887224669603524, "grad_norm": 1.8724260838054423, "learning_rate": 4.693781886898521e-06, "loss": 0.5571156740188599, "step": 3941 }, { "epoch": 1.3890748898678413, "grad_norm": 1.8352093678478665, "learning_rate": 4.688844284047466e-06, "loss": 0.489155113697052, "step": 3942 }, { "epoch": 1.3894273127753305, "grad_norm": 2.3056906716340793, "learning_rate": 4.683908484168487e-06, "loss": 0.6422649621963501, "step": 3943 }, { "epoch": 1.3897797356828194, "grad_norm": 2.1056674936107345, "learning_rate": 4.67897448893713e-06, "loss": 0.6800041794776917, "step": 3944 }, { "epoch": 1.3901321585903084, "grad_norm": 1.9512416893069657, "learning_rate": 4.674042300028345e-06, "loss": 0.6091655492782593, "step": 3945 }, { "epoch": 1.3904845814977973, "grad_norm": 1.5832960247380383, "learning_rate": 4.669111919116442e-06, "loss": 0.6217864751815796, "step": 3946 }, { "epoch": 1.3908370044052862, "grad_norm": 1.9328669999328483, "learning_rate": 4.664183347875144e-06, "loss": 0.6140862703323364, "step": 3947 }, { "epoch": 1.3911894273127754, "grad_norm": 1.5467868836495022, "learning_rate": 4.659256587977542e-06, "loss": 0.5485835075378418, "step": 3948 }, { "epoch": 1.3915418502202643, "grad_norm": 1.9704789330010746, "learning_rate": 4.654331641096118e-06, "loss": 0.642849862575531, "step": 3949 }, { "epoch": 1.3918942731277533, "grad_norm": 3.421035640959237, "learning_rate": 4.649408508902739e-06, "loss": 0.7084407806396484, "step": 3950 }, { "epoch": 1.3922466960352424, "grad_norm": 1.780782004302536, "learning_rate": 4.644487193068653e-06, "loss": 0.4798510670661926, "step": 3951 }, { "epoch": 1.3925991189427314, "grad_norm": 2.0571809281532056, "learning_rate": 4.639567695264493e-06, "loss": 0.6350974440574646, "step": 3952 }, { "epoch": 1.3929515418502203, "grad_norm": 1.6636780012798107, "learning_rate": 4.634650017160285e-06, "loss": 0.6046940684318542, "step": 3953 }, { "epoch": 1.3933039647577092, "grad_norm": 1.8656342511774384, "learning_rate": 4.629734160425412e-06, "loss": 0.5262438058853149, "step": 3954 }, { "epoch": 1.3936563876651982, "grad_norm": 1.6602375526420536, "learning_rate": 4.6248201267286666e-06, "loss": 0.4836997985839844, "step": 3955 }, { "epoch": 1.394008810572687, "grad_norm": 1.8387545975251456, "learning_rate": 4.619907917738206e-06, "loss": 0.5491573810577393, "step": 3956 }, { "epoch": 1.3943612334801763, "grad_norm": 1.7103638500009937, "learning_rate": 4.614997535121574e-06, "loss": 0.5778772830963135, "step": 3957 }, { "epoch": 1.3947136563876652, "grad_norm": 1.886204345973439, "learning_rate": 4.61008898054569e-06, "loss": 0.6235651969909668, "step": 3958 }, { "epoch": 1.3950660792951541, "grad_norm": 1.533461324415723, "learning_rate": 4.605182255676857e-06, "loss": 0.5192956924438477, "step": 3959 }, { "epoch": 1.3954185022026433, "grad_norm": 1.6490801359766816, "learning_rate": 4.600277362180753e-06, "loss": 0.5652563571929932, "step": 3960 }, { "epoch": 1.3957709251101322, "grad_norm": 2.0491508628562594, "learning_rate": 4.595374301722445e-06, "loss": 0.6451884508132935, "step": 3961 }, { "epoch": 1.3961233480176212, "grad_norm": 1.6267669051180629, "learning_rate": 4.5904730759663555e-06, "loss": 0.6358006000518799, "step": 3962 }, { "epoch": 1.39647577092511, "grad_norm": 1.9868299068304147, "learning_rate": 4.5855736865763104e-06, "loss": 0.6122751832008362, "step": 3963 }, { "epoch": 1.396828193832599, "grad_norm": 1.6563994945684704, "learning_rate": 4.580676135215495e-06, "loss": 0.5563797354698181, "step": 3964 }, { "epoch": 1.397180616740088, "grad_norm": 1.7043306637307543, "learning_rate": 4.575780423546476e-06, "loss": 0.5915960669517517, "step": 3965 }, { "epoch": 1.3975330396475771, "grad_norm": 2.2793683384994363, "learning_rate": 4.570886553231196e-06, "loss": 0.5755159854888916, "step": 3966 }, { "epoch": 1.397885462555066, "grad_norm": 1.713166792254198, "learning_rate": 4.565994525930967e-06, "loss": 0.7017625570297241, "step": 3967 }, { "epoch": 1.398237885462555, "grad_norm": 1.901331269180062, "learning_rate": 4.5611043433064875e-06, "loss": 0.7623441815376282, "step": 3968 }, { "epoch": 1.3985903083700442, "grad_norm": 1.772343766995311, "learning_rate": 4.556216007017822e-06, "loss": 0.5561864376068115, "step": 3969 }, { "epoch": 1.398942731277533, "grad_norm": 1.7107369517825557, "learning_rate": 4.5513295187243975e-06, "loss": 0.516582727432251, "step": 3970 }, { "epoch": 1.399295154185022, "grad_norm": 1.6087287767761917, "learning_rate": 4.5464448800850366e-06, "loss": 0.6324976682662964, "step": 3971 }, { "epoch": 1.399647577092511, "grad_norm": 1.660721417089598, "learning_rate": 4.541562092757918e-06, "loss": 0.5926251411437988, "step": 3972 }, { "epoch": 1.4, "grad_norm": 1.7443423550845751, "learning_rate": 4.536681158400598e-06, "loss": 0.5677082538604736, "step": 3973 }, { "epoch": 1.400352422907489, "grad_norm": 1.791823926745788, "learning_rate": 4.531802078669997e-06, "loss": 0.5267887115478516, "step": 3974 }, { "epoch": 1.400704845814978, "grad_norm": 2.3840846637544617, "learning_rate": 4.526924855222411e-06, "loss": 0.6361796855926514, "step": 3975 }, { "epoch": 1.401057268722467, "grad_norm": 1.9992656380929168, "learning_rate": 4.522049489713513e-06, "loss": 0.5906916856765747, "step": 3976 }, { "epoch": 1.4014096916299559, "grad_norm": 1.932616358578933, "learning_rate": 4.517175983798334e-06, "loss": 0.647320568561554, "step": 3977 }, { "epoch": 1.401762114537445, "grad_norm": 1.7297380971513312, "learning_rate": 4.512304339131271e-06, "loss": 0.6129240989685059, "step": 3978 }, { "epoch": 1.402114537444934, "grad_norm": 1.8820056515419912, "learning_rate": 4.507434557366106e-06, "loss": 0.5550417900085449, "step": 3979 }, { "epoch": 1.402466960352423, "grad_norm": 3.410101687197828, "learning_rate": 4.502566640155972e-06, "loss": 0.5677829384803772, "step": 3980 }, { "epoch": 1.4028193832599118, "grad_norm": 2.037826582552855, "learning_rate": 4.497700589153379e-06, "loss": 0.6627114415168762, "step": 3981 }, { "epoch": 1.4031718061674008, "grad_norm": 2.0278559165710197, "learning_rate": 4.492836406010197e-06, "loss": 0.7225712537765503, "step": 3982 }, { "epoch": 1.40352422907489, "grad_norm": 1.6877243893704514, "learning_rate": 4.487974092377661e-06, "loss": 0.5259708762168884, "step": 3983 }, { "epoch": 1.4038766519823789, "grad_norm": 1.930838228409862, "learning_rate": 4.4831136499063856e-06, "loss": 0.5509500503540039, "step": 3984 }, { "epoch": 1.4042290748898678, "grad_norm": 1.862328702111506, "learning_rate": 4.478255080246338e-06, "loss": 0.5436242818832397, "step": 3985 }, { "epoch": 1.4045814977973567, "grad_norm": 1.9252586062101578, "learning_rate": 4.473398385046839e-06, "loss": 0.591008186340332, "step": 3986 }, { "epoch": 1.404933920704846, "grad_norm": 1.8551590253300663, "learning_rate": 4.4685435659565975e-06, "loss": 0.7463438510894775, "step": 3987 }, { "epoch": 1.4052863436123348, "grad_norm": 2.7212267236094445, "learning_rate": 4.46369062462367e-06, "loss": 0.5672414898872375, "step": 3988 }, { "epoch": 1.4056387665198238, "grad_norm": 1.9023461618951703, "learning_rate": 4.458839562695481e-06, "loss": 0.6022762060165405, "step": 3989 }, { "epoch": 1.4059911894273127, "grad_norm": 2.975414442801074, "learning_rate": 4.453990381818811e-06, "loss": 0.8312792181968689, "step": 3990 }, { "epoch": 1.4063436123348017, "grad_norm": 1.5291152049255947, "learning_rate": 4.4491430836398055e-06, "loss": 0.475655198097229, "step": 3991 }, { "epoch": 1.4066960352422908, "grad_norm": 2.205738960261052, "learning_rate": 4.444297669803981e-06, "loss": 0.6317172050476074, "step": 3992 }, { "epoch": 1.4070484581497797, "grad_norm": 1.7590033801874774, "learning_rate": 4.439454141956194e-06, "loss": 0.5412036180496216, "step": 3993 }, { "epoch": 1.4074008810572687, "grad_norm": 1.8534848369039538, "learning_rate": 4.434612501740671e-06, "loss": 0.6401170492172241, "step": 3994 }, { "epoch": 1.4077533039647578, "grad_norm": 1.6819739888663638, "learning_rate": 4.429772750801007e-06, "loss": 0.6175628900527954, "step": 3995 }, { "epoch": 1.4081057268722468, "grad_norm": 1.9863542351176011, "learning_rate": 4.424934890780142e-06, "loss": 0.6875946521759033, "step": 3996 }, { "epoch": 1.4084581497797357, "grad_norm": 1.6357928529424866, "learning_rate": 4.420098923320378e-06, "loss": 0.6404017210006714, "step": 3997 }, { "epoch": 1.4088105726872246, "grad_norm": 2.096371594852834, "learning_rate": 4.415264850063378e-06, "loss": 0.7569783329963684, "step": 3998 }, { "epoch": 1.4091629955947136, "grad_norm": 1.9373448832520324, "learning_rate": 4.410432672650154e-06, "loss": 0.6125228404998779, "step": 3999 }, { "epoch": 1.4095154185022025, "grad_norm": 1.8206271046178746, "learning_rate": 4.405602392721091e-06, "loss": 0.6187582015991211, "step": 4000 }, { "epoch": 1.4098678414096917, "grad_norm": 1.6622405329305723, "learning_rate": 4.400774011915907e-06, "loss": 0.6086148023605347, "step": 4001 }, { "epoch": 1.4102202643171806, "grad_norm": 1.4174012456939833, "learning_rate": 4.3959475318736885e-06, "loss": 0.4140232801437378, "step": 4002 }, { "epoch": 1.4105726872246696, "grad_norm": 1.836512159334361, "learning_rate": 4.391122954232883e-06, "loss": 0.5065237879753113, "step": 4003 }, { "epoch": 1.4109251101321587, "grad_norm": 1.458932644295331, "learning_rate": 4.386300280631279e-06, "loss": 0.4817734658718109, "step": 4004 }, { "epoch": 1.4112775330396476, "grad_norm": 1.6662288245729417, "learning_rate": 4.381479512706025e-06, "loss": 0.6339706778526306, "step": 4005 }, { "epoch": 1.4116299559471366, "grad_norm": 2.1459595089971653, "learning_rate": 4.376660652093621e-06, "loss": 0.6581720113754272, "step": 4006 }, { "epoch": 1.4119823788546255, "grad_norm": 2.1052256395432503, "learning_rate": 4.3718437004299174e-06, "loss": 0.722156286239624, "step": 4007 }, { "epoch": 1.4123348017621145, "grad_norm": 2.007137048045836, "learning_rate": 4.36702865935013e-06, "loss": 0.5262913703918457, "step": 4008 }, { "epoch": 1.4126872246696034, "grad_norm": 1.6239575731802327, "learning_rate": 4.362215530488805e-06, "loss": 0.6242132186889648, "step": 4009 }, { "epoch": 1.4130396475770926, "grad_norm": 1.6412038783326008, "learning_rate": 4.35740431547985e-06, "loss": 0.48776593804359436, "step": 4010 }, { "epoch": 1.4133920704845815, "grad_norm": 1.4539922592281447, "learning_rate": 4.352595015956528e-06, "loss": 0.5528746843338013, "step": 4011 }, { "epoch": 1.4137444933920704, "grad_norm": 1.881555645901769, "learning_rate": 4.347787633551444e-06, "loss": 0.6282942295074463, "step": 4012 }, { "epoch": 1.4140969162995596, "grad_norm": 1.997464157113011, "learning_rate": 4.342982169896555e-06, "loss": 0.6113284826278687, "step": 4013 }, { "epoch": 1.4144493392070485, "grad_norm": 1.696170493669202, "learning_rate": 4.3381786266231685e-06, "loss": 0.5756875872612, "step": 4014 }, { "epoch": 1.4148017621145375, "grad_norm": 1.8012350757266906, "learning_rate": 4.333377005361931e-06, "loss": 0.6180154085159302, "step": 4015 }, { "epoch": 1.4151541850220264, "grad_norm": 2.2454634074572146, "learning_rate": 4.328577307742855e-06, "loss": 0.5728827118873596, "step": 4016 }, { "epoch": 1.4155066079295153, "grad_norm": 1.7928891595746113, "learning_rate": 4.323779535395278e-06, "loss": 0.5248062014579773, "step": 4017 }, { "epoch": 1.4158590308370045, "grad_norm": 1.7454680737255013, "learning_rate": 4.318983689947895e-06, "loss": 0.5938228368759155, "step": 4018 }, { "epoch": 1.4162114537444934, "grad_norm": 1.8931460456480809, "learning_rate": 4.3141897730287544e-06, "loss": 0.7085045576095581, "step": 4019 }, { "epoch": 1.4165638766519824, "grad_norm": 2.566425134177144, "learning_rate": 4.309397786265235e-06, "loss": 0.599969744682312, "step": 4020 }, { "epoch": 1.4169162995594713, "grad_norm": 2.186511304730039, "learning_rate": 4.30460773128407e-06, "loss": 0.5784738063812256, "step": 4021 }, { "epoch": 1.4172687224669605, "grad_norm": 1.8802349185240168, "learning_rate": 4.299819609711332e-06, "loss": 0.6492793560028076, "step": 4022 }, { "epoch": 1.4176211453744494, "grad_norm": 1.6886854891683005, "learning_rate": 4.2950334231724375e-06, "loss": 0.6690749526023865, "step": 4023 }, { "epoch": 1.4179735682819383, "grad_norm": 1.8482135160791267, "learning_rate": 4.290249173292158e-06, "loss": 0.5919139981269836, "step": 4024 }, { "epoch": 1.4183259911894273, "grad_norm": 1.6202611135629348, "learning_rate": 4.285466861694583e-06, "loss": 0.5661630630493164, "step": 4025 }, { "epoch": 1.4186784140969162, "grad_norm": 1.7328062744712673, "learning_rate": 4.280686490003169e-06, "loss": 0.547730565071106, "step": 4026 }, { "epoch": 1.4190308370044054, "grad_norm": 1.7270546788274348, "learning_rate": 4.2759080598406985e-06, "loss": 0.6150445938110352, "step": 4027 }, { "epoch": 1.4193832599118943, "grad_norm": 2.048539568947664, "learning_rate": 4.271131572829303e-06, "loss": 0.6522917747497559, "step": 4028 }, { "epoch": 1.4197356828193832, "grad_norm": 1.952118534937186, "learning_rate": 4.266357030590449e-06, "loss": 0.8456230163574219, "step": 4029 }, { "epoch": 1.4200881057268724, "grad_norm": 1.810792149813479, "learning_rate": 4.261584434744945e-06, "loss": 0.6059526801109314, "step": 4030 }, { "epoch": 1.4204405286343613, "grad_norm": 1.8213808222910857, "learning_rate": 4.256813786912937e-06, "loss": 0.6289907693862915, "step": 4031 }, { "epoch": 1.4207929515418503, "grad_norm": 1.5510911353998291, "learning_rate": 4.252045088713919e-06, "loss": 0.48954638838768005, "step": 4032 }, { "epoch": 1.4211453744493392, "grad_norm": 2.020061779490103, "learning_rate": 4.2472783417667055e-06, "loss": 0.6999461054801941, "step": 4033 }, { "epoch": 1.4214977973568281, "grad_norm": 1.9629261898681878, "learning_rate": 4.242513547689466e-06, "loss": 0.5610899925231934, "step": 4034 }, { "epoch": 1.421850220264317, "grad_norm": 1.8415242379631616, "learning_rate": 4.237750708099697e-06, "loss": 0.6240172386169434, "step": 4035 }, { "epoch": 1.4222026431718062, "grad_norm": 1.887889822972652, "learning_rate": 4.2329898246142356e-06, "loss": 0.6368240118026733, "step": 4036 }, { "epoch": 1.4225550660792952, "grad_norm": 2.0839652521207483, "learning_rate": 4.228230898849253e-06, "loss": 0.6242600679397583, "step": 4037 }, { "epoch": 1.422907488986784, "grad_norm": 1.7622749957844728, "learning_rate": 4.223473932420255e-06, "loss": 0.6446138620376587, "step": 4038 }, { "epoch": 1.4232599118942733, "grad_norm": 1.8800444061446486, "learning_rate": 4.218718926942081e-06, "loss": 0.7108229398727417, "step": 4039 }, { "epoch": 1.4236123348017622, "grad_norm": 1.7917659431298882, "learning_rate": 4.213965884028919e-06, "loss": 0.5279660820960999, "step": 4040 }, { "epoch": 1.4239647577092511, "grad_norm": 1.7747691200912903, "learning_rate": 4.209214805294264e-06, "loss": 0.6422853469848633, "step": 4041 }, { "epoch": 1.42431718061674, "grad_norm": 1.848339978168105, "learning_rate": 4.2044656923509704e-06, "loss": 0.6355602741241455, "step": 4042 }, { "epoch": 1.424669603524229, "grad_norm": 1.7787421175687093, "learning_rate": 4.19971854681121e-06, "loss": 0.5351370573043823, "step": 4043 }, { "epoch": 1.425022026431718, "grad_norm": 2.0300248809256987, "learning_rate": 4.194973370286492e-06, "loss": 0.5573978424072266, "step": 4044 }, { "epoch": 1.425374449339207, "grad_norm": 1.9433750628346875, "learning_rate": 4.1902301643876555e-06, "loss": 0.5865412950515747, "step": 4045 }, { "epoch": 1.425726872246696, "grad_norm": 2.102324249123369, "learning_rate": 4.185488930724868e-06, "loss": 0.6231919527053833, "step": 4046 }, { "epoch": 1.426079295154185, "grad_norm": 1.7135783989067233, "learning_rate": 4.180749670907638e-06, "loss": 0.48964112997055054, "step": 4047 }, { "epoch": 1.4264317180616741, "grad_norm": 2.0973459527664686, "learning_rate": 4.176012386544796e-06, "loss": 0.6299121975898743, "step": 4048 }, { "epoch": 1.426784140969163, "grad_norm": 1.7239115182277114, "learning_rate": 4.171277079244492e-06, "loss": 0.5612789392471313, "step": 4049 }, { "epoch": 1.427136563876652, "grad_norm": 1.7396324571675468, "learning_rate": 4.166543750614227e-06, "loss": 0.4315321147441864, "step": 4050 }, { "epoch": 1.427488986784141, "grad_norm": 2.0031203112343094, "learning_rate": 4.1618124022608146e-06, "loss": 0.6300851702690125, "step": 4051 }, { "epoch": 1.4278414096916299, "grad_norm": 1.7808675185736187, "learning_rate": 4.1570830357904e-06, "loss": 0.6258795261383057, "step": 4052 }, { "epoch": 1.4281938325991188, "grad_norm": 1.9069027085637078, "learning_rate": 4.152355652808457e-06, "loss": 0.7364479303359985, "step": 4053 }, { "epoch": 1.428546255506608, "grad_norm": 1.8474323145969993, "learning_rate": 4.147630254919781e-06, "loss": 0.44845038652420044, "step": 4054 }, { "epoch": 1.428898678414097, "grad_norm": 1.6823890398766386, "learning_rate": 4.142906843728504e-06, "loss": 0.516815185546875, "step": 4055 }, { "epoch": 1.4292511013215858, "grad_norm": 1.6276914964492604, "learning_rate": 4.138185420838079e-06, "loss": 0.6296960711479187, "step": 4056 }, { "epoch": 1.429603524229075, "grad_norm": 1.728227730408027, "learning_rate": 4.133465987851269e-06, "loss": 0.5709103345870972, "step": 4057 }, { "epoch": 1.429955947136564, "grad_norm": 1.7709951139170081, "learning_rate": 4.128748546370186e-06, "loss": 0.5672547817230225, "step": 4058 }, { "epoch": 1.4303083700440529, "grad_norm": 1.8161591736426752, "learning_rate": 4.124033097996252e-06, "loss": 0.5927014350891113, "step": 4059 }, { "epoch": 1.4306607929515418, "grad_norm": 1.75056683772296, "learning_rate": 4.119319644330214e-06, "loss": 0.7021238803863525, "step": 4060 }, { "epoch": 1.4310132158590307, "grad_norm": 1.7949933259038664, "learning_rate": 4.114608186972143e-06, "loss": 0.5940784215927124, "step": 4061 }, { "epoch": 1.43136563876652, "grad_norm": 1.7958424742287702, "learning_rate": 4.109898727521429e-06, "loss": 0.46511122584342957, "step": 4062 }, { "epoch": 1.4317180616740088, "grad_norm": 1.7489789285307085, "learning_rate": 4.105191267576797e-06, "loss": 0.4710976481437683, "step": 4063 }, { "epoch": 1.4320704845814978, "grad_norm": 1.650142742870973, "learning_rate": 4.100485808736273e-06, "loss": 0.5947977900505066, "step": 4064 }, { "epoch": 1.4324229074889867, "grad_norm": 1.7620222249444284, "learning_rate": 4.095782352597214e-06, "loss": 0.6312115788459778, "step": 4065 }, { "epoch": 1.4327753303964759, "grad_norm": 1.7689711305484843, "learning_rate": 4.091080900756303e-06, "loss": 0.5709977149963379, "step": 4066 }, { "epoch": 1.4331277533039648, "grad_norm": 1.8903042666510779, "learning_rate": 4.086381454809535e-06, "loss": 0.6183716058731079, "step": 4067 }, { "epoch": 1.4334801762114537, "grad_norm": 1.8677159370638265, "learning_rate": 4.081684016352223e-06, "loss": 0.5139745473861694, "step": 4068 }, { "epoch": 1.4338325991189427, "grad_norm": 1.9141879794373917, "learning_rate": 4.076988586979004e-06, "loss": 0.6611173152923584, "step": 4069 }, { "epoch": 1.4341850220264316, "grad_norm": 1.7276457807578136, "learning_rate": 4.072295168283824e-06, "loss": 0.616943359375, "step": 4070 }, { "epoch": 1.4345374449339208, "grad_norm": 2.331740237042665, "learning_rate": 4.067603761859965e-06, "loss": 0.5388625264167786, "step": 4071 }, { "epoch": 1.4348898678414097, "grad_norm": 1.9571975377572324, "learning_rate": 4.062914369300002e-06, "loss": 0.5523884892463684, "step": 4072 }, { "epoch": 1.4352422907488986, "grad_norm": 1.8860165198416616, "learning_rate": 4.058226992195838e-06, "loss": 0.5610285997390747, "step": 4073 }, { "epoch": 1.4355947136563878, "grad_norm": 1.8522832262316333, "learning_rate": 4.0535416321387e-06, "loss": 0.583917498588562, "step": 4074 }, { "epoch": 1.4359471365638767, "grad_norm": 1.677482186323321, "learning_rate": 4.048858290719115e-06, "loss": 0.6025276184082031, "step": 4075 }, { "epoch": 1.4362995594713657, "grad_norm": 1.8037188167117204, "learning_rate": 4.044176969526936e-06, "loss": 0.5643888711929321, "step": 4076 }, { "epoch": 1.4366519823788546, "grad_norm": 1.709713655992042, "learning_rate": 4.0394976701513235e-06, "loss": 0.550167977809906, "step": 4077 }, { "epoch": 1.4370044052863435, "grad_norm": 2.1319034629476747, "learning_rate": 4.034820394180749e-06, "loss": 0.6182876825332642, "step": 4078 }, { "epoch": 1.4373568281938325, "grad_norm": 2.018408244379198, "learning_rate": 4.030145143203016e-06, "loss": 0.5197434425354004, "step": 4079 }, { "epoch": 1.4377092511013216, "grad_norm": 2.037308833831004, "learning_rate": 4.025471918805214e-06, "loss": 0.5351034998893738, "step": 4080 }, { "epoch": 1.4380616740088106, "grad_norm": 1.988655670021041, "learning_rate": 4.020800722573758e-06, "loss": 0.5576729774475098, "step": 4081 }, { "epoch": 1.4384140969162995, "grad_norm": 2.03830396836609, "learning_rate": 4.016131556094381e-06, "loss": 0.5557611584663391, "step": 4082 }, { "epoch": 1.4387665198237887, "grad_norm": 1.6841558782049018, "learning_rate": 4.011464420952115e-06, "loss": 0.5300010442733765, "step": 4083 }, { "epoch": 1.4391189427312776, "grad_norm": 2.5196291624747387, "learning_rate": 4.0067993187313085e-06, "loss": 0.5254991054534912, "step": 4084 }, { "epoch": 1.4394713656387665, "grad_norm": 1.9569129587138865, "learning_rate": 4.002136251015617e-06, "loss": 0.5044848322868347, "step": 4085 }, { "epoch": 1.4398237885462555, "grad_norm": 1.7587820286029368, "learning_rate": 3.997475219388005e-06, "loss": 0.6422302722930908, "step": 4086 }, { "epoch": 1.4401762114537444, "grad_norm": 1.7785161433093049, "learning_rate": 3.992816225430758e-06, "loss": 0.5502497553825378, "step": 4087 }, { "epoch": 1.4405286343612334, "grad_norm": 1.9272648866171629, "learning_rate": 3.988159270725448e-06, "loss": 0.7479537129402161, "step": 4088 }, { "epoch": 1.4408810572687225, "grad_norm": 1.8882665464741835, "learning_rate": 3.983504356852967e-06, "loss": 0.5418091416358948, "step": 4089 }, { "epoch": 1.4412334801762114, "grad_norm": 2.1909054908738805, "learning_rate": 3.978851485393519e-06, "loss": 0.5262568593025208, "step": 4090 }, { "epoch": 1.4415859030837004, "grad_norm": 1.7855475608149034, "learning_rate": 3.974200657926607e-06, "loss": 0.5419692397117615, "step": 4091 }, { "epoch": 1.4419383259911895, "grad_norm": 1.84908442821801, "learning_rate": 3.9695518760310425e-06, "loss": 0.5202164649963379, "step": 4092 }, { "epoch": 1.4422907488986785, "grad_norm": 1.6256093479781946, "learning_rate": 3.96490514128494e-06, "loss": 0.7232608795166016, "step": 4093 }, { "epoch": 1.4426431718061674, "grad_norm": 3.2107784732452473, "learning_rate": 3.960260455265721e-06, "loss": 0.5899156332015991, "step": 4094 }, { "epoch": 1.4429955947136563, "grad_norm": 1.9995831956411032, "learning_rate": 3.95561781955012e-06, "loss": 0.629068911075592, "step": 4095 }, { "epoch": 1.4433480176211453, "grad_norm": 1.9520751138167456, "learning_rate": 3.950977235714154e-06, "loss": 0.5584920644760132, "step": 4096 }, { "epoch": 1.4437004405286344, "grad_norm": 1.7280125643736322, "learning_rate": 3.9463387053331685e-06, "loss": 0.713936984539032, "step": 4097 }, { "epoch": 1.4440528634361234, "grad_norm": 2.7226452019662357, "learning_rate": 3.9417022299817944e-06, "loss": 0.7157076001167297, "step": 4098 }, { "epoch": 1.4444052863436123, "grad_norm": 1.940369638230087, "learning_rate": 3.937067811233972e-06, "loss": 0.6540844440460205, "step": 4099 }, { "epoch": 1.4447577092511013, "grad_norm": 1.6342043838390767, "learning_rate": 3.9324354506629425e-06, "loss": 0.5350022315979004, "step": 4100 }, { "epoch": 1.4451101321585904, "grad_norm": 1.9186113150470587, "learning_rate": 3.9278051498412475e-06, "loss": 0.6852695941925049, "step": 4101 }, { "epoch": 1.4454625550660793, "grad_norm": 1.8060312138879744, "learning_rate": 3.923176910340728e-06, "loss": 0.6059536337852478, "step": 4102 }, { "epoch": 1.4458149779735683, "grad_norm": 1.6721278909458728, "learning_rate": 3.918550733732536e-06, "loss": 0.5787979364395142, "step": 4103 }, { "epoch": 1.4461674008810572, "grad_norm": 1.8059605647431092, "learning_rate": 3.9139266215871e-06, "loss": 0.6068835258483887, "step": 4104 }, { "epoch": 1.4465198237885462, "grad_norm": 1.7804694224195132, "learning_rate": 3.909304575474175e-06, "loss": 0.5123663544654846, "step": 4105 }, { "epoch": 1.4468722466960353, "grad_norm": 1.832785857954117, "learning_rate": 3.9046845969627975e-06, "loss": 0.6285420656204224, "step": 4106 }, { "epoch": 1.4472246696035242, "grad_norm": 1.8029701329975896, "learning_rate": 3.9000666876213056e-06, "loss": 0.6186035871505737, "step": 4107 }, { "epoch": 1.4475770925110132, "grad_norm": 2.8121411727628174, "learning_rate": 3.895450849017336e-06, "loss": 0.6222661733627319, "step": 4108 }, { "epoch": 1.4479295154185021, "grad_norm": 1.7965214044078308, "learning_rate": 3.890837082717822e-06, "loss": 0.5889515280723572, "step": 4109 }, { "epoch": 1.4482819383259913, "grad_norm": 1.8839124618745182, "learning_rate": 3.8862253902889925e-06, "loss": 0.6160309314727783, "step": 4110 }, { "epoch": 1.4486343612334802, "grad_norm": 1.7651875927016676, "learning_rate": 3.881615773296381e-06, "loss": 0.48093074560165405, "step": 4111 }, { "epoch": 1.4489867841409692, "grad_norm": 1.8283039880345147, "learning_rate": 3.877008233304796e-06, "loss": 0.5851131677627563, "step": 4112 }, { "epoch": 1.449339207048458, "grad_norm": 1.7366010221761805, "learning_rate": 3.872402771878365e-06, "loss": 0.5322093963623047, "step": 4113 }, { "epoch": 1.449691629955947, "grad_norm": 1.7342840660368584, "learning_rate": 3.8677993905804956e-06, "loss": 0.652804970741272, "step": 4114 }, { "epoch": 1.4500440528634362, "grad_norm": 1.9583669696557284, "learning_rate": 3.863198090973891e-06, "loss": 0.5494996309280396, "step": 4115 }, { "epoch": 1.4503964757709251, "grad_norm": 1.910811405312081, "learning_rate": 3.8585988746205505e-06, "loss": 0.5641331672668457, "step": 4116 }, { "epoch": 1.450748898678414, "grad_norm": 1.7616537450177998, "learning_rate": 3.854001743081764e-06, "loss": 0.5415998697280884, "step": 4117 }, { "epoch": 1.4511013215859032, "grad_norm": 1.599490372210091, "learning_rate": 3.849406697918113e-06, "loss": 0.4399813711643219, "step": 4118 }, { "epoch": 1.4514537444933922, "grad_norm": 2.0642862733318115, "learning_rate": 3.84481374068948e-06, "loss": 0.6228655576705933, "step": 4119 }, { "epoch": 1.451806167400881, "grad_norm": 1.650547077673145, "learning_rate": 3.8402228729550195e-06, "loss": 0.5575108528137207, "step": 4120 }, { "epoch": 1.45215859030837, "grad_norm": 2.4780057667058704, "learning_rate": 3.835634096273197e-06, "loss": 0.5705434679985046, "step": 4121 }, { "epoch": 1.452511013215859, "grad_norm": 2.1620556917486184, "learning_rate": 3.831047412201758e-06, "loss": 0.5649456977844238, "step": 4122 }, { "epoch": 1.452863436123348, "grad_norm": 1.9734169166383557, "learning_rate": 3.826462822297736e-06, "loss": 0.5656554698944092, "step": 4123 }, { "epoch": 1.453215859030837, "grad_norm": 1.8883507101257415, "learning_rate": 3.82188032811746e-06, "loss": 0.6565591096878052, "step": 4124 }, { "epoch": 1.453568281938326, "grad_norm": 1.9823635297408013, "learning_rate": 3.817299931216537e-06, "loss": 0.6553423404693604, "step": 4125 }, { "epoch": 1.453920704845815, "grad_norm": 1.8362785094722764, "learning_rate": 3.812721633149883e-06, "loss": 0.5401671528816223, "step": 4126 }, { "epoch": 1.454273127753304, "grad_norm": 2.008049720412482, "learning_rate": 3.808145435471674e-06, "loss": 0.7275381088256836, "step": 4127 }, { "epoch": 1.454625550660793, "grad_norm": 1.827455905179675, "learning_rate": 3.80357133973539e-06, "loss": 0.6384310722351074, "step": 4128 }, { "epoch": 1.454977973568282, "grad_norm": 1.986935058055083, "learning_rate": 3.7989993474937993e-06, "loss": 0.7783250212669373, "step": 4129 }, { "epoch": 1.455330396475771, "grad_norm": 2.1923612655628624, "learning_rate": 3.7944294602989473e-06, "loss": 0.752954363822937, "step": 4130 }, { "epoch": 1.4556828193832598, "grad_norm": 1.801491937261316, "learning_rate": 3.789861679702169e-06, "loss": 0.6099411249160767, "step": 4131 }, { "epoch": 1.4560352422907488, "grad_norm": 2.12230143233965, "learning_rate": 3.7852960072540845e-06, "loss": 0.6608012914657593, "step": 4132 }, { "epoch": 1.456387665198238, "grad_norm": 1.836228759881875, "learning_rate": 3.7807324445045924e-06, "loss": 0.5119853615760803, "step": 4133 }, { "epoch": 1.4567400881057269, "grad_norm": 2.036719543857632, "learning_rate": 3.7761709930028923e-06, "loss": 0.6353520750999451, "step": 4134 }, { "epoch": 1.4570925110132158, "grad_norm": 1.9234147822597618, "learning_rate": 3.7716116542974434e-06, "loss": 0.6427614688873291, "step": 4135 }, { "epoch": 1.457444933920705, "grad_norm": 2.34139645382815, "learning_rate": 3.7670544299360003e-06, "loss": 0.6205203533172607, "step": 4136 }, { "epoch": 1.457797356828194, "grad_norm": 1.940401751978381, "learning_rate": 3.7624993214656046e-06, "loss": 0.5957762002944946, "step": 4137 }, { "epoch": 1.4581497797356828, "grad_norm": 1.8842452122457418, "learning_rate": 3.7579463304325714e-06, "loss": 0.6698043346405029, "step": 4138 }, { "epoch": 1.4585022026431718, "grad_norm": 1.825534553754035, "learning_rate": 3.7533954583824982e-06, "loss": 0.5947796106338501, "step": 4139 }, { "epoch": 1.4588546255506607, "grad_norm": 1.7817965501913557, "learning_rate": 3.7488467068602664e-06, "loss": 0.5905463695526123, "step": 4140 }, { "epoch": 1.4592070484581499, "grad_norm": 1.8530726474927524, "learning_rate": 3.74430007741003e-06, "loss": 0.6218722462654114, "step": 4141 }, { "epoch": 1.4595594713656388, "grad_norm": 1.9872212615104103, "learning_rate": 3.739755571575241e-06, "loss": 0.6124013066291809, "step": 4142 }, { "epoch": 1.4599118942731277, "grad_norm": 1.8897226451904012, "learning_rate": 3.7352131908986046e-06, "loss": 0.5816842317581177, "step": 4143 }, { "epoch": 1.4602643171806167, "grad_norm": 1.780742815029414, "learning_rate": 3.7306729369221197e-06, "loss": 0.5225531458854675, "step": 4144 }, { "epoch": 1.4606167400881058, "grad_norm": 1.5899946748394236, "learning_rate": 3.7261348111870663e-06, "loss": 0.4536696672439575, "step": 4145 }, { "epoch": 1.4609691629955948, "grad_norm": 1.6530094281559282, "learning_rate": 3.7215988152339968e-06, "loss": 0.5777832269668579, "step": 4146 }, { "epoch": 1.4613215859030837, "grad_norm": 2.0042576738233993, "learning_rate": 3.717064950602737e-06, "loss": 0.5964622497558594, "step": 4147 }, { "epoch": 1.4616740088105726, "grad_norm": 1.634683701176406, "learning_rate": 3.7125332188323937e-06, "loss": 0.47224390506744385, "step": 4148 }, { "epoch": 1.4620264317180616, "grad_norm": 2.0219703130043474, "learning_rate": 3.708003621461347e-06, "loss": 0.5989271402359009, "step": 4149 }, { "epoch": 1.4623788546255507, "grad_norm": 1.7865027495889427, "learning_rate": 3.7034761600272627e-06, "loss": 0.6171919107437134, "step": 4150 }, { "epoch": 1.4627312775330397, "grad_norm": 1.742831115289917, "learning_rate": 3.6989508360670647e-06, "loss": 0.7064418792724609, "step": 4151 }, { "epoch": 1.4630837004405286, "grad_norm": 2.236539087690149, "learning_rate": 3.6944276511169577e-06, "loss": 0.6055941581726074, "step": 4152 }, { "epoch": 1.4634361233480178, "grad_norm": 1.7433765587507288, "learning_rate": 3.689906606712429e-06, "loss": 0.4550645351409912, "step": 4153 }, { "epoch": 1.4637885462555067, "grad_norm": 1.929810725161399, "learning_rate": 3.68538770438823e-06, "loss": 0.5958502292633057, "step": 4154 }, { "epoch": 1.4641409691629956, "grad_norm": 2.057185852502653, "learning_rate": 3.680870945678389e-06, "loss": 0.5574297904968262, "step": 4155 }, { "epoch": 1.4644933920704846, "grad_norm": 1.443537567568116, "learning_rate": 3.676356332116202e-06, "loss": 0.46494683623313904, "step": 4156 }, { "epoch": 1.4648458149779735, "grad_norm": 1.7341220293452018, "learning_rate": 3.671843865234238e-06, "loss": 0.5549977421760559, "step": 4157 }, { "epoch": 1.4651982378854624, "grad_norm": 1.7585158502615206, "learning_rate": 3.6673335465643488e-06, "loss": 0.5620779395103455, "step": 4158 }, { "epoch": 1.4655506607929516, "grad_norm": 1.845448976603358, "learning_rate": 3.662825377637638e-06, "loss": 0.5945389270782471, "step": 4159 }, { "epoch": 1.4659030837004405, "grad_norm": 1.9218401758762256, "learning_rate": 3.6583193599844867e-06, "loss": 0.6923668384552002, "step": 4160 }, { "epoch": 1.4662555066079295, "grad_norm": 2.16834694145402, "learning_rate": 3.653815495134557e-06, "loss": 0.6848515868186951, "step": 4161 }, { "epoch": 1.4666079295154186, "grad_norm": 1.922504159473904, "learning_rate": 3.649313784616765e-06, "loss": 0.640125036239624, "step": 4162 }, { "epoch": 1.4669603524229076, "grad_norm": 1.816415927402479, "learning_rate": 3.6448142299593026e-06, "loss": 0.6879653930664062, "step": 4163 }, { "epoch": 1.4673127753303965, "grad_norm": 1.9534619637738762, "learning_rate": 3.6403168326896286e-06, "loss": 0.6757794618606567, "step": 4164 }, { "epoch": 1.4676651982378854, "grad_norm": 1.7476054801499117, "learning_rate": 3.6358215943344664e-06, "loss": 0.6405826807022095, "step": 4165 }, { "epoch": 1.4680176211453744, "grad_norm": 2.1448885390527064, "learning_rate": 3.6313285164198187e-06, "loss": 0.692136287689209, "step": 4166 }, { "epoch": 1.4683700440528633, "grad_norm": 1.8449983835752888, "learning_rate": 3.626837600470935e-06, "loss": 0.6305568218231201, "step": 4167 }, { "epoch": 1.4687224669603525, "grad_norm": 2.1026866185280264, "learning_rate": 3.6223488480123427e-06, "loss": 0.7040449380874634, "step": 4168 }, { "epoch": 1.4690748898678414, "grad_norm": 1.5463095765444386, "learning_rate": 3.6178622605678403e-06, "loss": 0.5064427852630615, "step": 4169 }, { "epoch": 1.4694273127753303, "grad_norm": 1.6549157120829303, "learning_rate": 3.6133778396604813e-06, "loss": 0.46597155928611755, "step": 4170 }, { "epoch": 1.4697797356828195, "grad_norm": 1.9774014610728103, "learning_rate": 3.6088955868125864e-06, "loss": 0.5764908790588379, "step": 4171 }, { "epoch": 1.4701321585903084, "grad_norm": 1.9347279402338318, "learning_rate": 3.6044155035457405e-06, "loss": 0.5808656215667725, "step": 4172 }, { "epoch": 1.4704845814977974, "grad_norm": 2.0116811142135202, "learning_rate": 3.599937591380791e-06, "loss": 0.5439287424087524, "step": 4173 }, { "epoch": 1.4708370044052863, "grad_norm": 1.5674669602592264, "learning_rate": 3.595461851837857e-06, "loss": 0.5943965911865234, "step": 4174 }, { "epoch": 1.4711894273127752, "grad_norm": 1.8847509954427386, "learning_rate": 3.590988286436302e-06, "loss": 0.631833016872406, "step": 4175 }, { "epoch": 1.4715418502202642, "grad_norm": 1.9232774716266652, "learning_rate": 3.5865168966947718e-06, "loss": 0.514176070690155, "step": 4176 }, { "epoch": 1.4718942731277533, "grad_norm": 1.7211351925277203, "learning_rate": 3.582047684131159e-06, "loss": 0.584772527217865, "step": 4177 }, { "epoch": 1.4722466960352423, "grad_norm": 1.7726013207799318, "learning_rate": 3.5775806502626244e-06, "loss": 0.5085974931716919, "step": 4178 }, { "epoch": 1.4725991189427312, "grad_norm": 2.1422494719737464, "learning_rate": 3.573115796605584e-06, "loss": 0.62562495470047, "step": 4179 }, { "epoch": 1.4729515418502204, "grad_norm": 1.9507950967896, "learning_rate": 3.5686531246757206e-06, "loss": 0.5815824270248413, "step": 4180 }, { "epoch": 1.4733039647577093, "grad_norm": 1.8811159721586839, "learning_rate": 3.5641926359879663e-06, "loss": 0.6639705300331116, "step": 4181 }, { "epoch": 1.4736563876651982, "grad_norm": 1.8978737039698366, "learning_rate": 3.5597343320565293e-06, "loss": 0.6265684962272644, "step": 4182 }, { "epoch": 1.4740088105726872, "grad_norm": 1.877895350809495, "learning_rate": 3.5552782143948504e-06, "loss": 0.6113626956939697, "step": 4183 }, { "epoch": 1.4743612334801761, "grad_norm": 1.8492974346484832, "learning_rate": 3.550824284515655e-06, "loss": 0.5247244834899902, "step": 4184 }, { "epoch": 1.4747136563876653, "grad_norm": 1.871370335191458, "learning_rate": 3.5463725439309083e-06, "loss": 0.5524521470069885, "step": 4185 }, { "epoch": 1.4750660792951542, "grad_norm": 1.9955136913094453, "learning_rate": 3.5419229941518384e-06, "loss": 0.5462251305580139, "step": 4186 }, { "epoch": 1.4754185022026431, "grad_norm": 1.6609337480864497, "learning_rate": 3.5374756366889297e-06, "loss": 0.6500638723373413, "step": 4187 }, { "epoch": 1.475770925110132, "grad_norm": 2.0744035602538586, "learning_rate": 3.5330304730519216e-06, "loss": 0.4445904791355133, "step": 4188 }, { "epoch": 1.4761233480176212, "grad_norm": 1.7788816335434026, "learning_rate": 3.5285875047498075e-06, "loss": 0.6068017482757568, "step": 4189 }, { "epoch": 1.4764757709251102, "grad_norm": 1.683605461123042, "learning_rate": 3.5241467332908384e-06, "loss": 0.5577334761619568, "step": 4190 }, { "epoch": 1.4768281938325991, "grad_norm": 1.9605228698426533, "learning_rate": 3.5197081601825135e-06, "loss": 0.6596503257751465, "step": 4191 }, { "epoch": 1.477180616740088, "grad_norm": 1.9912955738456768, "learning_rate": 3.5152717869315965e-06, "loss": 0.6260303258895874, "step": 4192 }, { "epoch": 1.477533039647577, "grad_norm": 2.010278739994815, "learning_rate": 3.510837615044097e-06, "loss": 0.5467355251312256, "step": 4193 }, { "epoch": 1.4778854625550661, "grad_norm": 2.516516212561754, "learning_rate": 3.506405646025276e-06, "loss": 0.5306693911552429, "step": 4194 }, { "epoch": 1.478237885462555, "grad_norm": 1.7497505820795882, "learning_rate": 3.5019758813796513e-06, "loss": 0.6130725741386414, "step": 4195 }, { "epoch": 1.478590308370044, "grad_norm": 2.2199157894914143, "learning_rate": 3.4975483226109874e-06, "loss": 0.6656272411346436, "step": 4196 }, { "epoch": 1.4789427312775332, "grad_norm": 1.8654097488268417, "learning_rate": 3.4931229712223047e-06, "loss": 0.6018439531326294, "step": 4197 }, { "epoch": 1.479295154185022, "grad_norm": 2.0982915779378137, "learning_rate": 3.488699828715871e-06, "loss": 0.6635257005691528, "step": 4198 }, { "epoch": 1.479647577092511, "grad_norm": 1.8412514150393455, "learning_rate": 3.4842788965932038e-06, "loss": 0.5760075449943542, "step": 4199 }, { "epoch": 1.48, "grad_norm": 1.7009881043074442, "learning_rate": 3.4798601763550778e-06, "loss": 0.6951982975006104, "step": 4200 }, { "epoch": 1.480352422907489, "grad_norm": 1.880170776358824, "learning_rate": 3.475443669501508e-06, "loss": 0.6574405431747437, "step": 4201 }, { "epoch": 1.4807048458149779, "grad_norm": 1.8075997179509888, "learning_rate": 3.4710293775317593e-06, "loss": 0.5912263989448547, "step": 4202 }, { "epoch": 1.481057268722467, "grad_norm": 1.7703606198961421, "learning_rate": 3.4666173019443485e-06, "loss": 0.5169661045074463, "step": 4203 }, { "epoch": 1.481409691629956, "grad_norm": 1.6923587460137135, "learning_rate": 3.4622074442370345e-06, "loss": 0.5707660913467407, "step": 4204 }, { "epoch": 1.4817621145374449, "grad_norm": 1.7929036165873167, "learning_rate": 3.4577998059068354e-06, "loss": 0.5856584310531616, "step": 4205 }, { "epoch": 1.482114537444934, "grad_norm": 2.0144464412272636, "learning_rate": 3.4533943884499975e-06, "loss": 0.6306010484695435, "step": 4206 }, { "epoch": 1.482466960352423, "grad_norm": 1.9708292107625427, "learning_rate": 3.4489911933620245e-06, "loss": 0.6177140474319458, "step": 4207 }, { "epoch": 1.482819383259912, "grad_norm": 1.6187910458828605, "learning_rate": 3.4445902221376694e-06, "loss": 0.5527759790420532, "step": 4208 }, { "epoch": 1.4831718061674009, "grad_norm": 1.792049785406371, "learning_rate": 3.440191476270922e-06, "loss": 0.6838431358337402, "step": 4209 }, { "epoch": 1.4835242290748898, "grad_norm": 2.0640892173970933, "learning_rate": 3.4357949572550196e-06, "loss": 0.4876987636089325, "step": 4210 }, { "epoch": 1.4838766519823787, "grad_norm": 1.66358265635652, "learning_rate": 3.4314006665824427e-06, "loss": 0.5639296770095825, "step": 4211 }, { "epoch": 1.4842290748898679, "grad_norm": 1.9264745517709694, "learning_rate": 3.427008605744915e-06, "loss": 0.4189109802246094, "step": 4212 }, { "epoch": 1.4845814977973568, "grad_norm": 1.7041726412059042, "learning_rate": 3.422618776233413e-06, "loss": 0.6602882146835327, "step": 4213 }, { "epoch": 1.4849339207048458, "grad_norm": 2.105857994769297, "learning_rate": 3.4182311795381373e-06, "loss": 0.7642478942871094, "step": 4214 }, { "epoch": 1.485286343612335, "grad_norm": 1.703090383184888, "learning_rate": 3.41384581714854e-06, "loss": 0.5550031065940857, "step": 4215 }, { "epoch": 1.4856387665198239, "grad_norm": 1.956165427853548, "learning_rate": 3.4094626905533223e-06, "loss": 0.7036092281341553, "step": 4216 }, { "epoch": 1.4859911894273128, "grad_norm": 1.9055824872661757, "learning_rate": 3.4050818012404165e-06, "loss": 0.693780779838562, "step": 4217 }, { "epoch": 1.4863436123348017, "grad_norm": 1.750544621689218, "learning_rate": 3.4007031506969977e-06, "loss": 0.6315299868583679, "step": 4218 }, { "epoch": 1.4866960352422907, "grad_norm": 2.0036955114247355, "learning_rate": 3.396326740409481e-06, "loss": 0.47849225997924805, "step": 4219 }, { "epoch": 1.4870484581497798, "grad_norm": 1.9433930854735686, "learning_rate": 3.3919525718635195e-06, "loss": 0.6200336217880249, "step": 4220 }, { "epoch": 1.4874008810572688, "grad_norm": 1.7540152253976415, "learning_rate": 3.3875806465440152e-06, "loss": 0.7594903707504272, "step": 4221 }, { "epoch": 1.4877533039647577, "grad_norm": 1.8336468945254887, "learning_rate": 3.383210965935093e-06, "loss": 0.47159409523010254, "step": 4222 }, { "epoch": 1.4881057268722466, "grad_norm": 1.6169320059495966, "learning_rate": 3.3788435315201216e-06, "loss": 0.5272495150566101, "step": 4223 }, { "epoch": 1.4884581497797358, "grad_norm": 1.8268891947791475, "learning_rate": 3.3744783447817177e-06, "loss": 0.43847334384918213, "step": 4224 }, { "epoch": 1.4888105726872247, "grad_norm": 1.7778298915864024, "learning_rate": 3.370115407201724e-06, "loss": 0.656914234161377, "step": 4225 }, { "epoch": 1.4891629955947137, "grad_norm": 1.9329427480605288, "learning_rate": 3.3657547202612128e-06, "loss": 0.6379527449607849, "step": 4226 }, { "epoch": 1.4895154185022026, "grad_norm": 2.0864597290427103, "learning_rate": 3.3613962854405136e-06, "loss": 0.6254120469093323, "step": 4227 }, { "epoch": 1.4898678414096915, "grad_norm": 1.9356514762449182, "learning_rate": 3.35704010421917e-06, "loss": 0.6567566990852356, "step": 4228 }, { "epoch": 1.4902202643171807, "grad_norm": 1.7537675986626187, "learning_rate": 3.352686178075981e-06, "loss": 0.5121499300003052, "step": 4229 }, { "epoch": 1.4905726872246696, "grad_norm": 1.5674669685831402, "learning_rate": 3.3483345084889595e-06, "loss": 0.5727466344833374, "step": 4230 }, { "epoch": 1.4909251101321586, "grad_norm": 2.005473410378335, "learning_rate": 3.3439850969353614e-06, "loss": 0.6013318300247192, "step": 4231 }, { "epoch": 1.4912775330396475, "grad_norm": 1.8391832358116647, "learning_rate": 3.3396379448916836e-06, "loss": 0.6350653767585754, "step": 4232 }, { "epoch": 1.4916299559471367, "grad_norm": 1.8543607360516363, "learning_rate": 3.335293053833645e-06, "loss": 0.7072123885154724, "step": 4233 }, { "epoch": 1.4919823788546256, "grad_norm": 1.7154205163032374, "learning_rate": 3.330950425236201e-06, "loss": 0.6208378076553345, "step": 4234 }, { "epoch": 1.4923348017621145, "grad_norm": 2.0942315596519667, "learning_rate": 3.3266100605735397e-06, "loss": 0.656146764755249, "step": 4235 }, { "epoch": 1.4926872246696035, "grad_norm": 1.953931788636606, "learning_rate": 3.322271961319076e-06, "loss": 0.6145347952842712, "step": 4236 }, { "epoch": 1.4930396475770924, "grad_norm": 1.7528969029549952, "learning_rate": 3.3179361289454694e-06, "loss": 0.5876312255859375, "step": 4237 }, { "epoch": 1.4933920704845816, "grad_norm": 1.601290490679199, "learning_rate": 3.3136025649245897e-06, "loss": 0.48365384340286255, "step": 4238 }, { "epoch": 1.4937444933920705, "grad_norm": 2.189978154300805, "learning_rate": 3.3092712707275467e-06, "loss": 0.566576361656189, "step": 4239 }, { "epoch": 1.4940969162995594, "grad_norm": 1.9878421762040837, "learning_rate": 3.3049422478246886e-06, "loss": 0.6982032060623169, "step": 4240 }, { "epoch": 1.4944493392070486, "grad_norm": 1.9039317781349454, "learning_rate": 3.3006154976855787e-06, "loss": 0.5802686214447021, "step": 4241 }, { "epoch": 1.4948017621145375, "grad_norm": 1.8292762393050834, "learning_rate": 3.296291021779016e-06, "loss": 0.6656724214553833, "step": 4242 }, { "epoch": 1.4951541850220265, "grad_norm": 1.8194685949700777, "learning_rate": 3.2919688215730227e-06, "loss": 0.5081402063369751, "step": 4243 }, { "epoch": 1.4955066079295154, "grad_norm": 1.858251792062496, "learning_rate": 3.28764889853485e-06, "loss": 0.6963785290718079, "step": 4244 }, { "epoch": 1.4958590308370043, "grad_norm": 1.6076782907375928, "learning_rate": 3.283331254130987e-06, "loss": 0.4953869581222534, "step": 4245 }, { "epoch": 1.4962114537444933, "grad_norm": 1.7009256372822803, "learning_rate": 3.2790158898271283e-06, "loss": 0.5495179295539856, "step": 4246 }, { "epoch": 1.4965638766519824, "grad_norm": 1.5282320768079813, "learning_rate": 3.274702807088208e-06, "loss": 0.6238610148429871, "step": 4247 }, { "epoch": 1.4969162995594714, "grad_norm": 1.7145305152154042, "learning_rate": 3.270392007378389e-06, "loss": 0.557083249092102, "step": 4248 }, { "epoch": 1.4972687224669603, "grad_norm": 1.8458971793579602, "learning_rate": 3.2660834921610495e-06, "loss": 0.6317561864852905, "step": 4249 }, { "epoch": 1.4976211453744495, "grad_norm": 2.025909664851984, "learning_rate": 3.2617772628987974e-06, "loss": 0.5957529544830322, "step": 4250 }, { "epoch": 1.4979735682819384, "grad_norm": 1.8950835934769208, "learning_rate": 3.2574733210534637e-06, "loss": 0.7661205530166626, "step": 4251 }, { "epoch": 1.4983259911894273, "grad_norm": 1.6920068443310292, "learning_rate": 3.2531716680861024e-06, "loss": 0.5616782903671265, "step": 4252 }, { "epoch": 1.4986784140969163, "grad_norm": 1.770897830706882, "learning_rate": 3.2488723054569905e-06, "loss": 0.5679990649223328, "step": 4253 }, { "epoch": 1.4990308370044052, "grad_norm": 1.8387315322882807, "learning_rate": 3.2445752346256244e-06, "loss": 0.6355923414230347, "step": 4254 }, { "epoch": 1.4993832599118941, "grad_norm": 1.5734033631685667, "learning_rate": 3.2402804570507316e-06, "loss": 0.5050745010375977, "step": 4255 }, { "epoch": 1.4997356828193833, "grad_norm": 1.9535070131295427, "learning_rate": 3.2359879741902557e-06, "loss": 0.6585286855697632, "step": 4256 }, { "epoch": 1.5000881057268722, "grad_norm": 1.9369843836982625, "learning_rate": 3.2316977875013567e-06, "loss": 0.5108245611190796, "step": 4257 }, { "epoch": 1.5004405286343614, "grad_norm": 1.7460361732263896, "learning_rate": 3.2274098984404223e-06, "loss": 0.5270702838897705, "step": 4258 }, { "epoch": 1.5007929515418503, "grad_norm": 2.09582870313145, "learning_rate": 3.223124308463057e-06, "loss": 0.6421051025390625, "step": 4259 }, { "epoch": 1.5011453744493393, "grad_norm": 2.0173715825527454, "learning_rate": 3.218841019024084e-06, "loss": 0.6040945053100586, "step": 4260 }, { "epoch": 1.5014977973568282, "grad_norm": 3.5488261180155023, "learning_rate": 3.214560031577548e-06, "loss": 0.6389988660812378, "step": 4261 }, { "epoch": 1.5018502202643171, "grad_norm": 2.1449229280338096, "learning_rate": 3.210281347576707e-06, "loss": 0.6474273800849915, "step": 4262 }, { "epoch": 1.502202643171806, "grad_norm": 2.0726789637634666, "learning_rate": 3.206004968474048e-06, "loss": 0.7020560503005981, "step": 4263 }, { "epoch": 1.502555066079295, "grad_norm": 1.7677587583992656, "learning_rate": 3.2017308957212644e-06, "loss": 0.574647068977356, "step": 4264 }, { "epoch": 1.5029074889867842, "grad_norm": 1.8152121117445819, "learning_rate": 3.1974591307692724e-06, "loss": 0.6912944316864014, "step": 4265 }, { "epoch": 1.503259911894273, "grad_norm": 1.7825438750387144, "learning_rate": 3.1931896750682036e-06, "loss": 0.7738592028617859, "step": 4266 }, { "epoch": 1.5036123348017623, "grad_norm": 1.7835054391965142, "learning_rate": 3.188922530067402e-06, "loss": 0.6418012380599976, "step": 4267 }, { "epoch": 1.5039647577092512, "grad_norm": 2.0481798246782628, "learning_rate": 3.1846576972154343e-06, "loss": 0.639055609703064, "step": 4268 }, { "epoch": 1.5043171806167401, "grad_norm": 1.8365579809471801, "learning_rate": 3.1803951779600774e-06, "loss": 0.5512406229972839, "step": 4269 }, { "epoch": 1.504669603524229, "grad_norm": 1.7182877192220278, "learning_rate": 3.1761349737483194e-06, "loss": 0.5838354229927063, "step": 4270 }, { "epoch": 1.505022026431718, "grad_norm": 1.5090233544437164, "learning_rate": 3.1718770860263747e-06, "loss": 0.5903568267822266, "step": 4271 }, { "epoch": 1.505374449339207, "grad_norm": 1.761348463041135, "learning_rate": 3.1676215162396604e-06, "loss": 0.5610073804855347, "step": 4272 }, { "epoch": 1.5057268722466959, "grad_norm": 1.9899291186285208, "learning_rate": 3.163368265832809e-06, "loss": 0.6543136835098267, "step": 4273 }, { "epoch": 1.506079295154185, "grad_norm": 1.9484911821126696, "learning_rate": 3.1591173362496686e-06, "loss": 0.6586440801620483, "step": 4274 }, { "epoch": 1.506431718061674, "grad_norm": 1.7389367867721892, "learning_rate": 3.1548687289332958e-06, "loss": 0.5360713601112366, "step": 4275 }, { "epoch": 1.5067841409691631, "grad_norm": 2.1157677007043243, "learning_rate": 3.1506224453259615e-06, "loss": 0.6695356369018555, "step": 4276 }, { "epoch": 1.507136563876652, "grad_norm": 1.7594436585853632, "learning_rate": 3.146378486869146e-06, "loss": 0.5708016753196716, "step": 4277 }, { "epoch": 1.507488986784141, "grad_norm": 1.997964983412431, "learning_rate": 3.142136855003538e-06, "loss": 0.5412342548370361, "step": 4278 }, { "epoch": 1.50784140969163, "grad_norm": 1.645092688511499, "learning_rate": 3.1378975511690468e-06, "loss": 0.5392874479293823, "step": 4279 }, { "epoch": 1.5081938325991189, "grad_norm": 2.1591157791946256, "learning_rate": 3.133660576804781e-06, "loss": 0.6559237241744995, "step": 4280 }, { "epoch": 1.5085462555066078, "grad_norm": 1.6625372644532221, "learning_rate": 3.1294259333490597e-06, "loss": 0.49973511695861816, "step": 4281 }, { "epoch": 1.5088986784140968, "grad_norm": 1.9292311285357981, "learning_rate": 3.1251936222394152e-06, "loss": 0.5458316206932068, "step": 4282 }, { "epoch": 1.509251101321586, "grad_norm": 1.7771232071244591, "learning_rate": 3.120963644912579e-06, "loss": 0.628986656665802, "step": 4283 }, { "epoch": 1.5096035242290748, "grad_norm": 1.956059007614116, "learning_rate": 3.1167360028045103e-06, "loss": 0.6234235167503357, "step": 4284 }, { "epoch": 1.509955947136564, "grad_norm": 1.832929038299875, "learning_rate": 3.112510697350348e-06, "loss": 0.49892476201057434, "step": 4285 }, { "epoch": 1.510308370044053, "grad_norm": 1.859590789761001, "learning_rate": 3.1082877299844562e-06, "loss": 0.42951709032058716, "step": 4286 }, { "epoch": 1.5106607929515419, "grad_norm": 1.9658176092994974, "learning_rate": 3.1040671021404045e-06, "loss": 0.6392263770103455, "step": 4287 }, { "epoch": 1.5110132158590308, "grad_norm": 1.9240075529588605, "learning_rate": 3.099848815250964e-06, "loss": 0.6198933124542236, "step": 4288 }, { "epoch": 1.5113656387665197, "grad_norm": 2.6550374581713436, "learning_rate": 3.0956328707481055e-06, "loss": 0.7626048922538757, "step": 4289 }, { "epoch": 1.5117180616740087, "grad_norm": 2.057470135822257, "learning_rate": 3.0914192700630175e-06, "loss": 0.5245747566223145, "step": 4290 }, { "epoch": 1.5120704845814978, "grad_norm": 2.016409834872785, "learning_rate": 3.0872080146260818e-06, "loss": 0.6788556575775146, "step": 4291 }, { "epoch": 1.5124229074889868, "grad_norm": 1.8970717527543317, "learning_rate": 3.082999105866897e-06, "loss": 0.6224241852760315, "step": 4292 }, { "epoch": 1.512775330396476, "grad_norm": 1.8828342237083628, "learning_rate": 3.0787925452142477e-06, "loss": 0.706061840057373, "step": 4293 }, { "epoch": 1.5131277533039649, "grad_norm": 1.8530285911040203, "learning_rate": 3.07458833409613e-06, "loss": 0.7075262665748596, "step": 4294 }, { "epoch": 1.5134801762114538, "grad_norm": 1.8075779914700747, "learning_rate": 3.0703864739397494e-06, "loss": 0.4912101626396179, "step": 4295 }, { "epoch": 1.5138325991189427, "grad_norm": 1.908543179959353, "learning_rate": 3.066186966171507e-06, "loss": 0.6530265808105469, "step": 4296 }, { "epoch": 1.5141850220264317, "grad_norm": 2.12821134565194, "learning_rate": 3.0619898122169946e-06, "loss": 0.4905887246131897, "step": 4297 }, { "epoch": 1.5145374449339206, "grad_norm": 1.626400447189927, "learning_rate": 3.057795013501025e-06, "loss": 0.5025225281715393, "step": 4298 }, { "epoch": 1.5148898678414096, "grad_norm": 1.945132129374327, "learning_rate": 3.0536025714475946e-06, "loss": 0.5769479274749756, "step": 4299 }, { "epoch": 1.5152422907488987, "grad_norm": 2.0281621255217526, "learning_rate": 3.049412487479919e-06, "loss": 0.6275384426116943, "step": 4300 }, { "epoch": 1.5155947136563876, "grad_norm": 1.6860684781531563, "learning_rate": 3.04522476302039e-06, "loss": 0.5555096864700317, "step": 4301 }, { "epoch": 1.5159471365638768, "grad_norm": 1.9260394424858205, "learning_rate": 3.0410393994906096e-06, "loss": 0.5605635643005371, "step": 4302 }, { "epoch": 1.5162995594713657, "grad_norm": 2.127824876873509, "learning_rate": 3.0368563983113864e-06, "loss": 0.6006621718406677, "step": 4303 }, { "epoch": 1.5166519823788547, "grad_norm": 2.012171091410243, "learning_rate": 3.0326757609027147e-06, "loss": 0.5288259983062744, "step": 4304 }, { "epoch": 1.5170044052863436, "grad_norm": 1.7034257525965926, "learning_rate": 3.0284974886837903e-06, "loss": 0.5671676993370056, "step": 4305 }, { "epoch": 1.5173568281938326, "grad_norm": 2.496889571382279, "learning_rate": 3.0243215830730075e-06, "loss": 0.6072134971618652, "step": 4306 }, { "epoch": 1.5177092511013215, "grad_norm": 1.726261889224961, "learning_rate": 3.020148045487953e-06, "loss": 0.6010481119155884, "step": 4307 }, { "epoch": 1.5180616740088104, "grad_norm": 1.6250908189476003, "learning_rate": 3.0159768773454225e-06, "loss": 0.6126751899719238, "step": 4308 }, { "epoch": 1.5184140969162996, "grad_norm": 1.6123380534859018, "learning_rate": 3.011808080061387e-06, "loss": 0.5408819317817688, "step": 4309 }, { "epoch": 1.5187665198237885, "grad_norm": 1.6792977324898095, "learning_rate": 3.0076416550510255e-06, "loss": 0.6528562307357788, "step": 4310 }, { "epoch": 1.5191189427312777, "grad_norm": 1.6431948485087644, "learning_rate": 3.003477603728715e-06, "loss": 0.6355241537094116, "step": 4311 }, { "epoch": 1.5194713656387666, "grad_norm": 1.7630338655444058, "learning_rate": 2.9993159275080174e-06, "loss": 0.5511878728866577, "step": 4312 }, { "epoch": 1.5198237885462555, "grad_norm": 1.9093354982688662, "learning_rate": 2.9951566278016943e-06, "loss": 0.5066816806793213, "step": 4313 }, { "epoch": 1.5201762114537445, "grad_norm": 1.676344611272679, "learning_rate": 2.9909997060216966e-06, "loss": 0.5636533498764038, "step": 4314 }, { "epoch": 1.5205286343612334, "grad_norm": 1.8885420705538216, "learning_rate": 2.9868451635791706e-06, "loss": 0.49742352962493896, "step": 4315 }, { "epoch": 1.5208810572687224, "grad_norm": 2.013877525146858, "learning_rate": 2.9826930018844533e-06, "loss": 0.7264617681503296, "step": 4316 }, { "epoch": 1.5212334801762113, "grad_norm": 1.8792043539230026, "learning_rate": 2.978543222347076e-06, "loss": 0.5342350006103516, "step": 4317 }, { "epoch": 1.5215859030837005, "grad_norm": 1.7569176385310192, "learning_rate": 2.9743958263757554e-06, "loss": 0.4324883818626404, "step": 4318 }, { "epoch": 1.5219383259911894, "grad_norm": 1.8546496052344164, "learning_rate": 2.970250815378409e-06, "loss": 0.5867510437965393, "step": 4319 }, { "epoch": 1.5222907488986785, "grad_norm": 1.8415605839915816, "learning_rate": 2.966108190762138e-06, "loss": 0.7176594734191895, "step": 4320 }, { "epoch": 1.5226431718061675, "grad_norm": 1.967906535494615, "learning_rate": 2.9619679539332337e-06, "loss": 0.5810995101928711, "step": 4321 }, { "epoch": 1.5229955947136564, "grad_norm": 2.478705006420029, "learning_rate": 2.957830106297177e-06, "loss": 0.6262675523757935, "step": 4322 }, { "epoch": 1.5233480176211454, "grad_norm": 2.1743670559442245, "learning_rate": 2.9536946492586383e-06, "loss": 0.7743325233459473, "step": 4323 }, { "epoch": 1.5237004405286343, "grad_norm": 1.7129659102014092, "learning_rate": 2.9495615842214776e-06, "loss": 0.7706553936004639, "step": 4324 }, { "epoch": 1.5240528634361232, "grad_norm": 1.6835245148440698, "learning_rate": 2.9454309125887405e-06, "loss": 0.5982425808906555, "step": 4325 }, { "epoch": 1.5244052863436124, "grad_norm": 1.8547174799711497, "learning_rate": 2.9413026357626596e-06, "loss": 0.5580830574035645, "step": 4326 }, { "epoch": 1.5247577092511013, "grad_norm": 1.8954299514318398, "learning_rate": 2.937176755144662e-06, "loss": 0.5316063165664673, "step": 4327 }, { "epoch": 1.5251101321585903, "grad_norm": 1.7578719545795178, "learning_rate": 2.9330532721353523e-06, "loss": 0.574161171913147, "step": 4328 }, { "epoch": 1.5254625550660794, "grad_norm": 1.7055567103896054, "learning_rate": 2.9289321881345257e-06, "loss": 0.5339558720588684, "step": 4329 }, { "epoch": 1.5258149779735684, "grad_norm": 1.7071106155323514, "learning_rate": 2.9248135045411607e-06, "loss": 0.594109296798706, "step": 4330 }, { "epoch": 1.5261674008810573, "grad_norm": 2.0022142230843873, "learning_rate": 2.9206972227534237e-06, "loss": 0.5953024625778198, "step": 4331 }, { "epoch": 1.5265198237885462, "grad_norm": 1.6369885387081085, "learning_rate": 2.916583344168663e-06, "loss": 0.5142296552658081, "step": 4332 }, { "epoch": 1.5268722466960352, "grad_norm": 1.7205930689665365, "learning_rate": 2.912471870183411e-06, "loss": 0.5796314477920532, "step": 4333 }, { "epoch": 1.527224669603524, "grad_norm": 1.984086822092815, "learning_rate": 2.9083628021933886e-06, "loss": 0.7202566862106323, "step": 4334 }, { "epoch": 1.5275770925110133, "grad_norm": 2.2205082372485756, "learning_rate": 2.9042561415934956e-06, "loss": 0.6684188842773438, "step": 4335 }, { "epoch": 1.5279295154185022, "grad_norm": 1.7000543577524454, "learning_rate": 2.9001518897778147e-06, "loss": 0.5377634763717651, "step": 4336 }, { "epoch": 1.5282819383259914, "grad_norm": 1.7985805373418047, "learning_rate": 2.8960500481396115e-06, "loss": 0.5780486464500427, "step": 4337 }, { "epoch": 1.5286343612334803, "grad_norm": 1.7528900086241466, "learning_rate": 2.891950618071333e-06, "loss": 0.6020476818084717, "step": 4338 }, { "epoch": 1.5289867841409692, "grad_norm": 2.1939247460310303, "learning_rate": 2.8878536009646106e-06, "loss": 0.6076337099075317, "step": 4339 }, { "epoch": 1.5293392070484582, "grad_norm": 1.9795227787355654, "learning_rate": 2.883758998210251e-06, "loss": 0.6370673179626465, "step": 4340 }, { "epoch": 1.529691629955947, "grad_norm": 1.8686242611734982, "learning_rate": 2.879666811198244e-06, "loss": 0.41594892740249634, "step": 4341 }, { "epoch": 1.530044052863436, "grad_norm": 1.9135398095116771, "learning_rate": 2.8755770413177632e-06, "loss": 0.4506857693195343, "step": 4342 }, { "epoch": 1.530396475770925, "grad_norm": 1.730478313082556, "learning_rate": 2.8714896899571575e-06, "loss": 0.5883188247680664, "step": 4343 }, { "epoch": 1.5307488986784141, "grad_norm": 1.9672614741507624, "learning_rate": 2.8674047585039545e-06, "loss": 0.6327757239341736, "step": 4344 }, { "epoch": 1.531101321585903, "grad_norm": 1.958310227558085, "learning_rate": 2.863322248344862e-06, "loss": 0.6241307258605957, "step": 4345 }, { "epoch": 1.5314537444933922, "grad_norm": 1.9476460174005816, "learning_rate": 2.859242160865764e-06, "loss": 0.6982603669166565, "step": 4346 }, { "epoch": 1.5318061674008812, "grad_norm": 1.8365437525119523, "learning_rate": 2.8551644974517236e-06, "loss": 0.6293624639511108, "step": 4347 }, { "epoch": 1.53215859030837, "grad_norm": 1.8744498682554205, "learning_rate": 2.85108925948698e-06, "loss": 0.5630898475646973, "step": 4348 }, { "epoch": 1.532511013215859, "grad_norm": 1.9156047998547734, "learning_rate": 2.847016448354948e-06, "loss": 0.5300726294517517, "step": 4349 }, { "epoch": 1.532863436123348, "grad_norm": 1.763805411540431, "learning_rate": 2.8429460654382257e-06, "loss": 0.6302311420440674, "step": 4350 }, { "epoch": 1.533215859030837, "grad_norm": 2.0169994553834467, "learning_rate": 2.8388781121185815e-06, "loss": 0.5063371658325195, "step": 4351 }, { "epoch": 1.5335682819383258, "grad_norm": 1.995201540637565, "learning_rate": 2.8348125897769496e-06, "loss": 0.6116877198219299, "step": 4352 }, { "epoch": 1.533920704845815, "grad_norm": 1.7792408625607217, "learning_rate": 2.830749499793458e-06, "loss": 0.5671982169151306, "step": 4353 }, { "epoch": 1.534273127753304, "grad_norm": 1.927509688688397, "learning_rate": 2.826688843547395e-06, "loss": 0.6537752747535706, "step": 4354 }, { "epoch": 1.534625550660793, "grad_norm": 2.0558781996543805, "learning_rate": 2.8226306224172283e-06, "loss": 0.6608545780181885, "step": 4355 }, { "epoch": 1.534977973568282, "grad_norm": 2.2266474146630655, "learning_rate": 2.8185748377805977e-06, "loss": 0.7038587331771851, "step": 4356 }, { "epoch": 1.535330396475771, "grad_norm": 2.084521400671401, "learning_rate": 2.8145214910143128e-06, "loss": 0.7422336339950562, "step": 4357 }, { "epoch": 1.53568281938326, "grad_norm": 1.7545985325343467, "learning_rate": 2.8104705834943625e-06, "loss": 0.5739270448684692, "step": 4358 }, { "epoch": 1.5360352422907488, "grad_norm": 1.8063052751864486, "learning_rate": 2.8064221165959073e-06, "loss": 0.6429908275604248, "step": 4359 }, { "epoch": 1.5363876651982378, "grad_norm": 2.071223390835828, "learning_rate": 2.802376091693264e-06, "loss": 0.5660578012466431, "step": 4360 }, { "epoch": 1.5367400881057267, "grad_norm": 1.7313945668822706, "learning_rate": 2.798332510159942e-06, "loss": 0.4507398009300232, "step": 4361 }, { "epoch": 1.5370925110132159, "grad_norm": 1.638677595892734, "learning_rate": 2.7942913733686063e-06, "loss": 0.5107634663581848, "step": 4362 }, { "epoch": 1.5374449339207048, "grad_norm": 1.8351099655043759, "learning_rate": 2.790252682691106e-06, "loss": 0.505529522895813, "step": 4363 }, { "epoch": 1.537797356828194, "grad_norm": 1.7171378177734038, "learning_rate": 2.7862164394984405e-06, "loss": 0.459098219871521, "step": 4364 }, { "epoch": 1.538149779735683, "grad_norm": 1.6256824509042396, "learning_rate": 2.782182645160789e-06, "loss": 0.5200169086456299, "step": 4365 }, { "epoch": 1.5385022026431718, "grad_norm": 2.1593491644665908, "learning_rate": 2.778151301047506e-06, "loss": 0.6723796725273132, "step": 4366 }, { "epoch": 1.5388546255506608, "grad_norm": 1.7552269414614434, "learning_rate": 2.7741224085271067e-06, "loss": 0.5385584831237793, "step": 4367 }, { "epoch": 1.5392070484581497, "grad_norm": 1.6358174263890735, "learning_rate": 2.770095968967267e-06, "loss": 0.5766934156417847, "step": 4368 }, { "epoch": 1.5395594713656386, "grad_norm": 1.6116842273066272, "learning_rate": 2.766071983734845e-06, "loss": 0.6303011178970337, "step": 4369 }, { "epoch": 1.5399118942731278, "grad_norm": 2.0294439046284736, "learning_rate": 2.7620504541958525e-06, "loss": 0.6192827224731445, "step": 4370 }, { "epoch": 1.5402643171806167, "grad_norm": 1.9731545400175885, "learning_rate": 2.758031381715485e-06, "loss": 0.543215811252594, "step": 4371 }, { "epoch": 1.5406167400881057, "grad_norm": 1.8102023756492311, "learning_rate": 2.7540147676580808e-06, "loss": 0.6364312171936035, "step": 4372 }, { "epoch": 1.5409691629955948, "grad_norm": 1.7440307883728075, "learning_rate": 2.750000613387157e-06, "loss": 0.5625254511833191, "step": 4373 }, { "epoch": 1.5413215859030838, "grad_norm": 2.1646055145888377, "learning_rate": 2.7459889202654e-06, "loss": 0.7304128408432007, "step": 4374 }, { "epoch": 1.5416740088105727, "grad_norm": 2.1627384337401296, "learning_rate": 2.7419796896546536e-06, "loss": 0.676097571849823, "step": 4375 }, { "epoch": 1.5420264317180616, "grad_norm": 1.9373952441867042, "learning_rate": 2.7379729229159193e-06, "loss": 0.7024539709091187, "step": 4376 }, { "epoch": 1.5423788546255506, "grad_norm": 1.6778176206961017, "learning_rate": 2.7339686214093774e-06, "loss": 0.6357964277267456, "step": 4377 }, { "epoch": 1.5427312775330395, "grad_norm": 1.8606991682829432, "learning_rate": 2.729966786494361e-06, "loss": 0.5254555940628052, "step": 4378 }, { "epoch": 1.5430837004405287, "grad_norm": 1.527570009912515, "learning_rate": 2.7259674195293697e-06, "loss": 0.4899883270263672, "step": 4379 }, { "epoch": 1.5434361233480176, "grad_norm": 1.725531709071361, "learning_rate": 2.721970521872063e-06, "loss": 0.5750056505203247, "step": 4380 }, { "epoch": 1.5437885462555068, "grad_norm": 1.8900737960638598, "learning_rate": 2.71797609487926e-06, "loss": 0.5852059125900269, "step": 4381 }, { "epoch": 1.5441409691629957, "grad_norm": 1.8258629839457563, "learning_rate": 2.71398413990695e-06, "loss": 0.6360914707183838, "step": 4382 }, { "epoch": 1.5444933920704846, "grad_norm": 1.7586915096989222, "learning_rate": 2.7099946583102764e-06, "loss": 0.5120062828063965, "step": 4383 }, { "epoch": 1.5448458149779736, "grad_norm": 2.068877348919367, "learning_rate": 2.706007651443543e-06, "loss": 0.5798901319503784, "step": 4384 }, { "epoch": 1.5451982378854625, "grad_norm": 2.014366735127449, "learning_rate": 2.702023120660213e-06, "loss": 0.5112065076828003, "step": 4385 }, { "epoch": 1.5455506607929514, "grad_norm": 1.9281653354114374, "learning_rate": 2.6980410673129133e-06, "loss": 0.6136611700057983, "step": 4386 }, { "epoch": 1.5459030837004404, "grad_norm": 1.6841076662412324, "learning_rate": 2.694061492753426e-06, "loss": 0.5944457054138184, "step": 4387 }, { "epoch": 1.5462555066079295, "grad_norm": 1.9404009079173157, "learning_rate": 2.690084398332692e-06, "loss": 0.5931667685508728, "step": 4388 }, { "epoch": 1.5466079295154185, "grad_norm": 1.607840859056915, "learning_rate": 2.686109785400809e-06, "loss": 0.6112217307090759, "step": 4389 }, { "epoch": 1.5469603524229076, "grad_norm": 1.9090904865448288, "learning_rate": 2.68213765530704e-06, "loss": 0.549437940120697, "step": 4390 }, { "epoch": 1.5473127753303966, "grad_norm": 1.9826888565576624, "learning_rate": 2.6781680093997965e-06, "loss": 0.674758791923523, "step": 4391 }, { "epoch": 1.5476651982378855, "grad_norm": 1.874293916028551, "learning_rate": 2.6742008490266504e-06, "loss": 0.6015446186065674, "step": 4392 }, { "epoch": 1.5480176211453744, "grad_norm": 2.2556212033260223, "learning_rate": 2.6702361755343278e-06, "loss": 0.5512514710426331, "step": 4393 }, { "epoch": 1.5483700440528634, "grad_norm": 2.607818594949077, "learning_rate": 2.666273990268713e-06, "loss": 0.6443158984184265, "step": 4394 }, { "epoch": 1.5487224669603523, "grad_norm": 1.840692212890546, "learning_rate": 2.6623142945748447e-06, "loss": 0.5682512521743774, "step": 4395 }, { "epoch": 1.5490748898678413, "grad_norm": 2.173690182254911, "learning_rate": 2.658357089796917e-06, "loss": 0.5544074773788452, "step": 4396 }, { "epoch": 1.5494273127753304, "grad_norm": 1.726846155573174, "learning_rate": 2.6544023772782736e-06, "loss": 0.5811636447906494, "step": 4397 }, { "epoch": 1.5497797356828193, "grad_norm": 1.790573455353959, "learning_rate": 2.650450158361422e-06, "loss": 0.4696553647518158, "step": 4398 }, { "epoch": 1.5501321585903085, "grad_norm": 1.918353319441468, "learning_rate": 2.6465004343880153e-06, "loss": 0.6897521615028381, "step": 4399 }, { "epoch": 1.5504845814977974, "grad_norm": 1.9780672696205217, "learning_rate": 2.6425532066988613e-06, "loss": 0.6154924631118774, "step": 4400 }, { "epoch": 1.5508370044052864, "grad_norm": 2.0803038103367815, "learning_rate": 2.6386084766339214e-06, "loss": 0.5333596467971802, "step": 4401 }, { "epoch": 1.5511894273127753, "grad_norm": 1.8190921046801005, "learning_rate": 2.634666245532309e-06, "loss": 0.6633985042572021, "step": 4402 }, { "epoch": 1.5515418502202643, "grad_norm": 1.6722563074159322, "learning_rate": 2.630726514732289e-06, "loss": 0.7913509607315063, "step": 4403 }, { "epoch": 1.5518942731277532, "grad_norm": 1.8118598393520884, "learning_rate": 2.6267892855712763e-06, "loss": 0.5776455402374268, "step": 4404 }, { "epoch": 1.5522466960352423, "grad_norm": 1.68862603841886, "learning_rate": 2.6228545593858357e-06, "loss": 0.5912357568740845, "step": 4405 }, { "epoch": 1.5525991189427313, "grad_norm": 2.1281645633634274, "learning_rate": 2.618922337511689e-06, "loss": 0.49319127202033997, "step": 4406 }, { "epoch": 1.5529515418502202, "grad_norm": 1.9165140700223777, "learning_rate": 2.6149926212837016e-06, "loss": 0.5805023908615112, "step": 4407 }, { "epoch": 1.5533039647577094, "grad_norm": 1.8889646736612442, "learning_rate": 2.6110654120358902e-06, "loss": 0.5635806918144226, "step": 4408 }, { "epoch": 1.5536563876651983, "grad_norm": 1.6927734818193383, "learning_rate": 2.6071407111014178e-06, "loss": 0.5006709694862366, "step": 4409 }, { "epoch": 1.5540088105726872, "grad_norm": 2.1152391301202695, "learning_rate": 2.6032185198126005e-06, "loss": 0.6035311818122864, "step": 4410 }, { "epoch": 1.5543612334801762, "grad_norm": 1.7863466908276826, "learning_rate": 2.599298839500899e-06, "loss": 0.5978977680206299, "step": 4411 }, { "epoch": 1.5547136563876651, "grad_norm": 1.8741259739913476, "learning_rate": 2.5953816714969194e-06, "loss": 0.6330617070198059, "step": 4412 }, { "epoch": 1.555066079295154, "grad_norm": 2.090756152665107, "learning_rate": 2.591467017130426e-06, "loss": 0.6541750431060791, "step": 4413 }, { "epoch": 1.5554185022026432, "grad_norm": 2.229850729984303, "learning_rate": 2.5875548777303204e-06, "loss": 0.5503655076026917, "step": 4414 }, { "epoch": 1.5557709251101322, "grad_norm": 1.7715926792210983, "learning_rate": 2.583645254624645e-06, "loss": 0.5117509365081787, "step": 4415 }, { "epoch": 1.5561233480176213, "grad_norm": 2.2945620883910953, "learning_rate": 2.5797381491406027e-06, "loss": 0.6699894070625305, "step": 4416 }, { "epoch": 1.5564757709251102, "grad_norm": 2.0695182526571765, "learning_rate": 2.5758335626045308e-06, "loss": 0.6870071291923523, "step": 4417 }, { "epoch": 1.5568281938325992, "grad_norm": 1.7821456199762375, "learning_rate": 2.571931496341916e-06, "loss": 0.7680954933166504, "step": 4418 }, { "epoch": 1.5571806167400881, "grad_norm": 2.2345593449058203, "learning_rate": 2.568031951677389e-06, "loss": 0.6504727602005005, "step": 4419 }, { "epoch": 1.557533039647577, "grad_norm": 1.6796846625470907, "learning_rate": 2.5641349299347196e-06, "loss": 0.7101249098777771, "step": 4420 }, { "epoch": 1.557885462555066, "grad_norm": 1.6800594114237326, "learning_rate": 2.560240432436831e-06, "loss": 0.5734864473342896, "step": 4421 }, { "epoch": 1.558237885462555, "grad_norm": 2.065356657851052, "learning_rate": 2.5563484605057854e-06, "loss": 0.48660311102867126, "step": 4422 }, { "epoch": 1.558590308370044, "grad_norm": 2.0678483817870847, "learning_rate": 2.552459015462776e-06, "loss": 0.6442986726760864, "step": 4423 }, { "epoch": 1.558942731277533, "grad_norm": 1.721146259770593, "learning_rate": 2.548572098628158e-06, "loss": 0.5871995091438293, "step": 4424 }, { "epoch": 1.5592951541850222, "grad_norm": 1.8900651182173844, "learning_rate": 2.544687711321415e-06, "loss": 0.5899579524993896, "step": 4425 }, { "epoch": 1.5596475770925111, "grad_norm": 1.589021195095579, "learning_rate": 2.540805854861177e-06, "loss": 0.571341872215271, "step": 4426 }, { "epoch": 1.56, "grad_norm": 2.074587814486514, "learning_rate": 2.5369265305652112e-06, "loss": 0.6297308206558228, "step": 4427 }, { "epoch": 1.560352422907489, "grad_norm": 1.957815284803115, "learning_rate": 2.5330497397504274e-06, "loss": 0.6277692317962646, "step": 4428 }, { "epoch": 1.560704845814978, "grad_norm": 1.8075270549654299, "learning_rate": 2.5291754837328787e-06, "loss": 0.5124595165252686, "step": 4429 }, { "epoch": 1.5610572687224669, "grad_norm": 2.027466093132035, "learning_rate": 2.5253037638277557e-06, "loss": 0.6777669191360474, "step": 4430 }, { "epoch": 1.5614096916299558, "grad_norm": 2.299371691906574, "learning_rate": 2.521434581349378e-06, "loss": 0.7380247116088867, "step": 4431 }, { "epoch": 1.561762114537445, "grad_norm": 2.0566157739817825, "learning_rate": 2.5175679376112206e-06, "loss": 0.6605849266052246, "step": 4432 }, { "epoch": 1.562114537444934, "grad_norm": 1.7899790415054606, "learning_rate": 2.5137038339258837e-06, "loss": 0.5688329935073853, "step": 4433 }, { "epoch": 1.562466960352423, "grad_norm": 2.1227992795896258, "learning_rate": 2.5098422716051197e-06, "loss": 0.6731508374214172, "step": 4434 }, { "epoch": 1.562819383259912, "grad_norm": 1.766889438914358, "learning_rate": 2.505983251959798e-06, "loss": 0.5177330374717712, "step": 4435 }, { "epoch": 1.563171806167401, "grad_norm": 1.793841264632356, "learning_rate": 2.502126776299938e-06, "loss": 0.5307918787002563, "step": 4436 }, { "epoch": 1.5635242290748899, "grad_norm": 1.8402321267228738, "learning_rate": 2.4982728459346974e-06, "loss": 0.59647536277771, "step": 4437 }, { "epoch": 1.5638766519823788, "grad_norm": 2.049156650890273, "learning_rate": 2.494421462172365e-06, "loss": 0.6215553283691406, "step": 4438 }, { "epoch": 1.5642290748898677, "grad_norm": 1.7976631043220852, "learning_rate": 2.490572626320359e-06, "loss": 0.49461615085601807, "step": 4439 }, { "epoch": 1.5645814977973567, "grad_norm": 2.4138380625358757, "learning_rate": 2.486726339685247e-06, "loss": 0.6625338196754456, "step": 4440 }, { "epoch": 1.5649339207048458, "grad_norm": 1.5979739892152505, "learning_rate": 2.4828826035727214e-06, "loss": 0.4059983193874359, "step": 4441 }, { "epoch": 1.5652863436123348, "grad_norm": 1.7298713789472393, "learning_rate": 2.47904141928761e-06, "loss": 0.6234895586967468, "step": 4442 }, { "epoch": 1.565638766519824, "grad_norm": 1.8282339040044808, "learning_rate": 2.4752027881338757e-06, "loss": 0.513421893119812, "step": 4443 }, { "epoch": 1.5659911894273129, "grad_norm": 2.0213648562049693, "learning_rate": 2.4713667114146123e-06, "loss": 0.6168510913848877, "step": 4444 }, { "epoch": 1.5663436123348018, "grad_norm": 1.8904853102151467, "learning_rate": 2.4675331904320533e-06, "loss": 0.5474672317504883, "step": 4445 }, { "epoch": 1.5666960352422907, "grad_norm": 2.020157324166176, "learning_rate": 2.46370222648756e-06, "loss": 0.7464281916618347, "step": 4446 }, { "epoch": 1.5670484581497797, "grad_norm": 1.8187430699226648, "learning_rate": 2.4598738208816155e-06, "loss": 0.5890274047851562, "step": 4447 }, { "epoch": 1.5674008810572686, "grad_norm": 2.0160604417207293, "learning_rate": 2.4560479749138554e-06, "loss": 0.7577700018882751, "step": 4448 }, { "epoch": 1.5677533039647578, "grad_norm": 1.6711759350664435, "learning_rate": 2.4522246898830302e-06, "loss": 0.5374037027359009, "step": 4449 }, { "epoch": 1.5681057268722467, "grad_norm": 1.7947512315133625, "learning_rate": 2.4484039670870286e-06, "loss": 0.44840407371520996, "step": 4450 }, { "epoch": 1.5684581497797356, "grad_norm": 1.8087906354095658, "learning_rate": 2.4445858078228647e-06, "loss": 0.5144427418708801, "step": 4451 }, { "epoch": 1.5688105726872248, "grad_norm": 1.7889124821216469, "learning_rate": 2.440770213386684e-06, "loss": 0.39119952917099, "step": 4452 }, { "epoch": 1.5691629955947137, "grad_norm": 1.6376212389282347, "learning_rate": 2.436957185073766e-06, "loss": 0.5287434458732605, "step": 4453 }, { "epoch": 1.5695154185022027, "grad_norm": 2.2578778571267315, "learning_rate": 2.4331467241785157e-06, "loss": 0.568587064743042, "step": 4454 }, { "epoch": 1.5698678414096916, "grad_norm": 3.2977149916111608, "learning_rate": 2.429338831994458e-06, "loss": 0.5522792339324951, "step": 4455 }, { "epoch": 1.5702202643171805, "grad_norm": 1.7594156491061212, "learning_rate": 2.425533509814262e-06, "loss": 0.48070845007896423, "step": 4456 }, { "epoch": 1.5705726872246695, "grad_norm": 1.631888097687176, "learning_rate": 2.4217307589297135e-06, "loss": 0.44293439388275146, "step": 4457 }, { "epoch": 1.5709251101321586, "grad_norm": 1.933449446432769, "learning_rate": 2.4179305806317266e-06, "loss": 0.5753301382064819, "step": 4458 }, { "epoch": 1.5712775330396476, "grad_norm": 1.9958241636570169, "learning_rate": 2.414132976210346e-06, "loss": 0.5873000025749207, "step": 4459 }, { "epoch": 1.5716299559471367, "grad_norm": 2.2068877987049955, "learning_rate": 2.410337946954736e-06, "loss": 0.6084823608398438, "step": 4460 }, { "epoch": 1.5719823788546257, "grad_norm": 1.743876311662913, "learning_rate": 2.4065454941531963e-06, "loss": 0.541124165058136, "step": 4461 }, { "epoch": 1.5723348017621146, "grad_norm": 1.8080812306830252, "learning_rate": 2.4027556190931446e-06, "loss": 0.5170080661773682, "step": 4462 }, { "epoch": 1.5726872246696035, "grad_norm": 1.817245899938438, "learning_rate": 2.398968323061125e-06, "loss": 0.5613514184951782, "step": 4463 }, { "epoch": 1.5730396475770925, "grad_norm": 1.7097401781842303, "learning_rate": 2.395183607342807e-06, "loss": 0.6645728349685669, "step": 4464 }, { "epoch": 1.5733920704845814, "grad_norm": 1.8730205237982336, "learning_rate": 2.391401473222983e-06, "loss": 0.7077093124389648, "step": 4465 }, { "epoch": 1.5737444933920703, "grad_norm": 1.7460518248753176, "learning_rate": 2.387621921985571e-06, "loss": 0.5687523484230042, "step": 4466 }, { "epoch": 1.5740969162995595, "grad_norm": 1.9850945169232843, "learning_rate": 2.38384495491361e-06, "loss": 0.5837362408638, "step": 4467 }, { "epoch": 1.5744493392070484, "grad_norm": 2.051593268912329, "learning_rate": 2.3800705732892615e-06, "loss": 0.5552037358283997, "step": 4468 }, { "epoch": 1.5748017621145376, "grad_norm": 1.8128967121473578, "learning_rate": 2.376298778393814e-06, "loss": 0.5502952337265015, "step": 4469 }, { "epoch": 1.5751541850220265, "grad_norm": 1.958629504700592, "learning_rate": 2.3725295715076734e-06, "loss": 0.5621509552001953, "step": 4470 }, { "epoch": 1.5755066079295155, "grad_norm": 2.20917213599842, "learning_rate": 2.3687629539103676e-06, "loss": 0.6703782081604004, "step": 4471 }, { "epoch": 1.5758590308370044, "grad_norm": 1.6659443121840707, "learning_rate": 2.3649989268805453e-06, "loss": 0.5681235194206238, "step": 4472 }, { "epoch": 1.5762114537444933, "grad_norm": 1.6009126465101926, "learning_rate": 2.361237491695978e-06, "loss": 0.611667811870575, "step": 4473 }, { "epoch": 1.5765638766519823, "grad_norm": 1.7200740539010873, "learning_rate": 2.3574786496335546e-06, "loss": 0.5758671760559082, "step": 4474 }, { "epoch": 1.5769162995594712, "grad_norm": 1.9125579541010735, "learning_rate": 2.3537224019692863e-06, "loss": 0.4865596294403076, "step": 4475 }, { "epoch": 1.5772687224669604, "grad_norm": 1.8564502689111453, "learning_rate": 2.3499687499782976e-06, "loss": 0.6356204152107239, "step": 4476 }, { "epoch": 1.5776211453744493, "grad_norm": 2.1421860610476022, "learning_rate": 2.346217694934847e-06, "loss": 0.7177166938781738, "step": 4477 }, { "epoch": 1.5779735682819385, "grad_norm": 1.5480906826266605, "learning_rate": 2.3424692381122882e-06, "loss": 0.5727916955947876, "step": 4478 }, { "epoch": 1.5783259911894274, "grad_norm": 1.8719733775312895, "learning_rate": 2.3387233807831144e-06, "loss": 0.4904511570930481, "step": 4479 }, { "epoch": 1.5786784140969163, "grad_norm": 1.781780296857209, "learning_rate": 2.3349801242189262e-06, "loss": 0.6029622554779053, "step": 4480 }, { "epoch": 1.5790308370044053, "grad_norm": 1.7377028122196188, "learning_rate": 2.3312394696904404e-06, "loss": 0.6462864875793457, "step": 4481 }, { "epoch": 1.5793832599118942, "grad_norm": 2.2050402923740555, "learning_rate": 2.327501418467495e-06, "loss": 0.6000367403030396, "step": 4482 }, { "epoch": 1.5797356828193831, "grad_norm": 1.8056795992302546, "learning_rate": 2.3237659718190398e-06, "loss": 0.5498829483985901, "step": 4483 }, { "epoch": 1.580088105726872, "grad_norm": 1.9193344841770834, "learning_rate": 2.320033131013142e-06, "loss": 0.5445006489753723, "step": 4484 }, { "epoch": 1.5804405286343612, "grad_norm": 1.737360484366453, "learning_rate": 2.316302897316992e-06, "loss": 0.4878338575363159, "step": 4485 }, { "epoch": 1.5807929515418502, "grad_norm": 1.9395301127212525, "learning_rate": 2.3125752719968763e-06, "loss": 0.473583459854126, "step": 4486 }, { "epoch": 1.5811453744493393, "grad_norm": 1.885736275905952, "learning_rate": 2.308850256318218e-06, "loss": 0.6530570983886719, "step": 4487 }, { "epoch": 1.5814977973568283, "grad_norm": 1.9957270393411881, "learning_rate": 2.30512785154554e-06, "loss": 0.6925215721130371, "step": 4488 }, { "epoch": 1.5818502202643172, "grad_norm": 2.319012517660613, "learning_rate": 2.3014080589424837e-06, "loss": 0.6210705637931824, "step": 4489 }, { "epoch": 1.5822026431718061, "grad_norm": 1.9814470349632005, "learning_rate": 2.2976908797718013e-06, "loss": 0.5843231678009033, "step": 4490 }, { "epoch": 1.582555066079295, "grad_norm": 1.8411432529202023, "learning_rate": 2.2939763152953576e-06, "loss": 0.7014307379722595, "step": 4491 }, { "epoch": 1.582907488986784, "grad_norm": 2.432500927945977, "learning_rate": 2.2902643667741386e-06, "loss": 0.563744843006134, "step": 4492 }, { "epoch": 1.5832599118942732, "grad_norm": 2.0467865020897227, "learning_rate": 2.286555035468233e-06, "loss": 0.6067275404930115, "step": 4493 }, { "epoch": 1.5836123348017621, "grad_norm": 1.4471777617782167, "learning_rate": 2.282848322636836e-06, "loss": 0.5471328496932983, "step": 4494 }, { "epoch": 1.583964757709251, "grad_norm": 1.8188988721843682, "learning_rate": 2.2791442295382693e-06, "loss": 0.4994550943374634, "step": 4495 }, { "epoch": 1.5843171806167402, "grad_norm": 1.9672025899108128, "learning_rate": 2.275442757429954e-06, "loss": 0.6064262390136719, "step": 4496 }, { "epoch": 1.5846696035242291, "grad_norm": 1.8109350365291292, "learning_rate": 2.2717439075684268e-06, "loss": 0.5119039416313171, "step": 4497 }, { "epoch": 1.585022026431718, "grad_norm": 2.2031235285356883, "learning_rate": 2.26804768120933e-06, "loss": 0.7276502251625061, "step": 4498 }, { "epoch": 1.585374449339207, "grad_norm": 2.0480046358265827, "learning_rate": 2.264354079607416e-06, "loss": 0.6175409555435181, "step": 4499 }, { "epoch": 1.585726872246696, "grad_norm": 2.165546737643913, "learning_rate": 2.2606631040165517e-06, "loss": 0.6289592981338501, "step": 4500 }, { "epoch": 1.5860792951541849, "grad_norm": 1.626913781336784, "learning_rate": 2.2569747556897103e-06, "loss": 0.5802761316299438, "step": 4501 }, { "epoch": 1.586431718061674, "grad_norm": 1.6717876401169283, "learning_rate": 2.2532890358789604e-06, "loss": 0.5883978605270386, "step": 4502 }, { "epoch": 1.586784140969163, "grad_norm": 1.7433478934489002, "learning_rate": 2.2496059458355e-06, "loss": 0.6915061473846436, "step": 4503 }, { "epoch": 1.5871365638766521, "grad_norm": 1.7904879000491816, "learning_rate": 2.2459254868096194e-06, "loss": 0.6255539655685425, "step": 4504 }, { "epoch": 1.587488986784141, "grad_norm": 2.0290072373401706, "learning_rate": 2.2422476600507203e-06, "loss": 0.6788307428359985, "step": 4505 }, { "epoch": 1.58784140969163, "grad_norm": 1.8646329547804459, "learning_rate": 2.2385724668073104e-06, "loss": 0.5651443004608154, "step": 4506 }, { "epoch": 1.588193832599119, "grad_norm": 1.6858252262208455, "learning_rate": 2.2348999083270005e-06, "loss": 0.5308901071548462, "step": 4507 }, { "epoch": 1.5885462555066079, "grad_norm": 2.3264820621642084, "learning_rate": 2.2312299858565156e-06, "loss": 0.60570228099823, "step": 4508 }, { "epoch": 1.5888986784140968, "grad_norm": 1.8330509972931788, "learning_rate": 2.22756270064168e-06, "loss": 0.6544185876846313, "step": 4509 }, { "epoch": 1.5892511013215858, "grad_norm": 1.7565673285953047, "learning_rate": 2.2238980539274156e-06, "loss": 0.667883038520813, "step": 4510 }, { "epoch": 1.589603524229075, "grad_norm": 1.7707733782287267, "learning_rate": 2.2202360469577622e-06, "loss": 0.647671103477478, "step": 4511 }, { "epoch": 1.5899559471365639, "grad_norm": 1.8031539733499908, "learning_rate": 2.216576680975856e-06, "loss": 0.6990867257118225, "step": 4512 }, { "epoch": 1.590308370044053, "grad_norm": 1.6913080596921681, "learning_rate": 2.212919957223938e-06, "loss": 0.6292023658752441, "step": 4513 }, { "epoch": 1.590660792951542, "grad_norm": 2.0512598736304763, "learning_rate": 2.2092658769433504e-06, "loss": 0.638721227645874, "step": 4514 }, { "epoch": 1.5910132158590309, "grad_norm": 2.0710919586830365, "learning_rate": 2.2056144413745396e-06, "loss": 0.5622225403785706, "step": 4515 }, { "epoch": 1.5913656387665198, "grad_norm": 1.9225600729192178, "learning_rate": 2.2019656517570576e-06, "loss": 0.44093507528305054, "step": 4516 }, { "epoch": 1.5917180616740088, "grad_norm": 1.9689195876449703, "learning_rate": 2.198319509329556e-06, "loss": 0.6889619827270508, "step": 4517 }, { "epoch": 1.5920704845814977, "grad_norm": 1.8723694409082583, "learning_rate": 2.1946760153297773e-06, "loss": 0.5873552560806274, "step": 4518 }, { "epoch": 1.5924229074889866, "grad_norm": 2.3733819724747245, "learning_rate": 2.191035170994584e-06, "loss": 0.7172325849533081, "step": 4519 }, { "epoch": 1.5927753303964758, "grad_norm": 1.5631566998768178, "learning_rate": 2.187396977559927e-06, "loss": 0.520845890045166, "step": 4520 }, { "epoch": 1.5931277533039647, "grad_norm": 1.5657344992000655, "learning_rate": 2.1837614362608574e-06, "loss": 0.5241606831550598, "step": 4521 }, { "epoch": 1.5934801762114539, "grad_norm": 2.0290302307971433, "learning_rate": 2.1801285483315303e-06, "loss": 0.583808422088623, "step": 4522 }, { "epoch": 1.5938325991189428, "grad_norm": 1.829890026298915, "learning_rate": 2.1764983150051955e-06, "loss": 0.4648814797401428, "step": 4523 }, { "epoch": 1.5941850220264318, "grad_norm": 1.9603824667877958, "learning_rate": 2.1728707375142087e-06, "loss": 0.590090274810791, "step": 4524 }, { "epoch": 1.5945374449339207, "grad_norm": 2.0292397946897527, "learning_rate": 2.16924581709002e-06, "loss": 0.6554102897644043, "step": 4525 }, { "epoch": 1.5948898678414096, "grad_norm": 2.011864917811992, "learning_rate": 2.1656235549631677e-06, "loss": 0.5880511999130249, "step": 4526 }, { "epoch": 1.5952422907488986, "grad_norm": 1.6246832017365502, "learning_rate": 2.1620039523633074e-06, "loss": 0.5779908299446106, "step": 4527 }, { "epoch": 1.5955947136563877, "grad_norm": 1.9147900218294176, "learning_rate": 2.1583870105191775e-06, "loss": 0.5030412673950195, "step": 4528 }, { "epoch": 1.5959471365638767, "grad_norm": 1.9632795275127009, "learning_rate": 2.1547727306586173e-06, "loss": 0.5667461156845093, "step": 4529 }, { "epoch": 1.5962995594713656, "grad_norm": 2.3190730605108882, "learning_rate": 2.151161114008563e-06, "loss": 0.6820607781410217, "step": 4530 }, { "epoch": 1.5966519823788548, "grad_norm": 1.7640709477354637, "learning_rate": 2.1475521617950425e-06, "loss": 0.6165209412574768, "step": 4531 }, { "epoch": 1.5970044052863437, "grad_norm": 1.897918487033638, "learning_rate": 2.1439458752431887e-06, "loss": 0.5987168550491333, "step": 4532 }, { "epoch": 1.5973568281938326, "grad_norm": 1.8946893490374197, "learning_rate": 2.1403422555772226e-06, "loss": 0.5161086320877075, "step": 4533 }, { "epoch": 1.5977092511013216, "grad_norm": 1.817150642667859, "learning_rate": 2.1367413040204543e-06, "loss": 0.5216903686523438, "step": 4534 }, { "epoch": 1.5980616740088105, "grad_norm": 1.7820775067820096, "learning_rate": 2.133143021795302e-06, "loss": 0.5664666891098022, "step": 4535 }, { "epoch": 1.5984140969162994, "grad_norm": 1.8205676682468495, "learning_rate": 2.129547410123268e-06, "loss": 0.501051127910614, "step": 4536 }, { "epoch": 1.5987665198237886, "grad_norm": 1.5799563385798543, "learning_rate": 2.1259544702249515e-06, "loss": 0.5466792583465576, "step": 4537 }, { "epoch": 1.5991189427312775, "grad_norm": 1.9007615560911546, "learning_rate": 2.122364203320043e-06, "loss": 0.5295613408088684, "step": 4538 }, { "epoch": 1.5994713656387667, "grad_norm": 1.6670646942482272, "learning_rate": 2.1187766106273224e-06, "loss": 0.5406922101974487, "step": 4539 }, { "epoch": 1.5998237885462556, "grad_norm": 2.0700620230157125, "learning_rate": 2.1151916933646764e-06, "loss": 0.5908178687095642, "step": 4540 }, { "epoch": 1.6001762114537446, "grad_norm": 1.8405525752725544, "learning_rate": 2.1116094527490594e-06, "loss": 0.6207743883132935, "step": 4541 }, { "epoch": 1.6005286343612335, "grad_norm": 2.7642600887250652, "learning_rate": 2.1080298899965413e-06, "loss": 0.5655614137649536, "step": 4542 }, { "epoch": 1.6008810572687224, "grad_norm": 1.5764846584358823, "learning_rate": 2.104453006322268e-06, "loss": 0.6019319295883179, "step": 4543 }, { "epoch": 1.6012334801762114, "grad_norm": 1.8499785252270624, "learning_rate": 2.1008788029404794e-06, "loss": 0.6109766364097595, "step": 4544 }, { "epoch": 1.6015859030837003, "grad_norm": 1.8285934792669327, "learning_rate": 2.0973072810645078e-06, "loss": 0.5309078693389893, "step": 4545 }, { "epoch": 1.6019383259911895, "grad_norm": 1.7116030885611606, "learning_rate": 2.093738441906774e-06, "loss": 0.5440298318862915, "step": 4546 }, { "epoch": 1.6022907488986784, "grad_norm": 1.6012955775631803, "learning_rate": 2.0901722866787842e-06, "loss": 0.46502384543418884, "step": 4547 }, { "epoch": 1.6026431718061676, "grad_norm": 1.7999501734847188, "learning_rate": 2.086608816591146e-06, "loss": 0.4822906255722046, "step": 4548 }, { "epoch": 1.6029955947136565, "grad_norm": 1.8169323717501906, "learning_rate": 2.083048032853534e-06, "loss": 0.6382625699043274, "step": 4549 }, { "epoch": 1.6033480176211454, "grad_norm": 1.7542851479568786, "learning_rate": 2.0794899366747334e-06, "loss": 0.6070914268493652, "step": 4550 }, { "epoch": 1.6037004405286344, "grad_norm": 1.8496689505105712, "learning_rate": 2.0759345292626045e-06, "loss": 0.5953283309936523, "step": 4551 }, { "epoch": 1.6040528634361233, "grad_norm": 1.6448363622587787, "learning_rate": 2.0723818118240958e-06, "loss": 0.47553640604019165, "step": 4552 }, { "epoch": 1.6044052863436122, "grad_norm": 2.007835441279153, "learning_rate": 2.0688317855652463e-06, "loss": 0.7020712494850159, "step": 4553 }, { "epoch": 1.6047577092511012, "grad_norm": 1.739770344308816, "learning_rate": 2.0652844516911776e-06, "loss": 0.5998836159706116, "step": 4554 }, { "epoch": 1.6051101321585903, "grad_norm": 1.7690620328907303, "learning_rate": 2.0617398114060983e-06, "loss": 0.6501786708831787, "step": 4555 }, { "epoch": 1.6054625550660793, "grad_norm": 1.7628232586759778, "learning_rate": 2.0581978659133097e-06, "loss": 0.6444278955459595, "step": 4556 }, { "epoch": 1.6058149779735684, "grad_norm": 1.8812364367093761, "learning_rate": 2.0546586164151827e-06, "loss": 0.6756579875946045, "step": 4557 }, { "epoch": 1.6061674008810574, "grad_norm": 1.9541887465796286, "learning_rate": 2.051122064113189e-06, "loss": 0.6043737530708313, "step": 4558 }, { "epoch": 1.6065198237885463, "grad_norm": 1.7992795463772795, "learning_rate": 2.047588210207877e-06, "loss": 0.6504104137420654, "step": 4559 }, { "epoch": 1.6068722466960352, "grad_norm": 1.8447157864854533, "learning_rate": 2.044057055898879e-06, "loss": 0.6586685180664062, "step": 4560 }, { "epoch": 1.6072246696035242, "grad_norm": 1.6895598009184531, "learning_rate": 2.0405286023849125e-06, "loss": 0.4463368058204651, "step": 4561 }, { "epoch": 1.607577092511013, "grad_norm": 1.626067629091748, "learning_rate": 2.037002850863777e-06, "loss": 0.5208157896995544, "step": 4562 }, { "epoch": 1.607929515418502, "grad_norm": 2.325947552099387, "learning_rate": 2.033479802532354e-06, "loss": 0.612602174282074, "step": 4563 }, { "epoch": 1.6082819383259912, "grad_norm": 1.8677335810734068, "learning_rate": 2.0299594585866166e-06, "loss": 0.6871482133865356, "step": 4564 }, { "epoch": 1.6086343612334801, "grad_norm": 2.1450630320575863, "learning_rate": 2.0264418202215998e-06, "loss": 0.5770177245140076, "step": 4565 }, { "epoch": 1.6089867841409693, "grad_norm": 2.0018570918486263, "learning_rate": 2.0229268886314413e-06, "loss": 0.600841224193573, "step": 4566 }, { "epoch": 1.6093392070484582, "grad_norm": 1.4951834973656204, "learning_rate": 2.0194146650093494e-06, "loss": 0.47742071747779846, "step": 4567 }, { "epoch": 1.6096916299559472, "grad_norm": 1.932667797658379, "learning_rate": 2.015905150547612e-06, "loss": 0.5528711080551147, "step": 4568 }, { "epoch": 1.610044052863436, "grad_norm": 1.7893968437532208, "learning_rate": 2.0123983464376028e-06, "loss": 0.6892603635787964, "step": 4569 }, { "epoch": 1.610396475770925, "grad_norm": 2.0432539431091405, "learning_rate": 2.0088942538697687e-06, "loss": 0.593653678894043, "step": 4570 }, { "epoch": 1.610748898678414, "grad_norm": 1.913622035178548, "learning_rate": 2.005392874033646e-06, "loss": 0.5570813417434692, "step": 4571 }, { "epoch": 1.6111013215859031, "grad_norm": 1.7912413841249368, "learning_rate": 2.0018942081178426e-06, "loss": 0.6723357439041138, "step": 4572 }, { "epoch": 1.611453744493392, "grad_norm": 1.8833118579628767, "learning_rate": 1.9983982573100413e-06, "loss": 0.5333940982818604, "step": 4573 }, { "epoch": 1.611806167400881, "grad_norm": 2.1881508790927358, "learning_rate": 1.9949050227970148e-06, "loss": 0.6404193043708801, "step": 4574 }, { "epoch": 1.6121585903083702, "grad_norm": 1.9103565569987608, "learning_rate": 1.991414505764605e-06, "loss": 0.6831241250038147, "step": 4575 }, { "epoch": 1.612511013215859, "grad_norm": 2.3229832844307063, "learning_rate": 1.9879267073977337e-06, "loss": 0.6741847991943359, "step": 4576 }, { "epoch": 1.612863436123348, "grad_norm": 1.843434925588856, "learning_rate": 1.9844416288804004e-06, "loss": 0.5234787464141846, "step": 4577 }, { "epoch": 1.613215859030837, "grad_norm": 1.931234115746558, "learning_rate": 1.9809592713956782e-06, "loss": 0.6462803483009338, "step": 4578 }, { "epoch": 1.613568281938326, "grad_norm": 2.495392945939654, "learning_rate": 1.977479636125724e-06, "loss": 0.612025797367096, "step": 4579 }, { "epoch": 1.6139207048458148, "grad_norm": 1.6414504893846202, "learning_rate": 1.9740027242517668e-06, "loss": 0.5065322518348694, "step": 4580 }, { "epoch": 1.614273127753304, "grad_norm": 1.9613495904560583, "learning_rate": 1.9705285369540994e-06, "loss": 0.4986911714076996, "step": 4581 }, { "epoch": 1.614625550660793, "grad_norm": 2.1185650604413926, "learning_rate": 1.967057075412111e-06, "loss": 0.6030969619750977, "step": 4582 }, { "epoch": 1.614977973568282, "grad_norm": 1.8032946015429019, "learning_rate": 1.963588340804251e-06, "loss": 0.6116718649864197, "step": 4583 }, { "epoch": 1.615330396475771, "grad_norm": 1.9008591407855147, "learning_rate": 1.960122334308047e-06, "loss": 0.8064850568771362, "step": 4584 }, { "epoch": 1.61568281938326, "grad_norm": 2.130250646945173, "learning_rate": 1.9566590571000997e-06, "loss": 0.7416974306106567, "step": 4585 }, { "epoch": 1.616035242290749, "grad_norm": 2.0285944926888604, "learning_rate": 1.9531985103560813e-06, "loss": 0.48169833421707153, "step": 4586 }, { "epoch": 1.6163876651982378, "grad_norm": 8.08226040018375, "learning_rate": 1.949740695250746e-06, "loss": 0.7766422629356384, "step": 4587 }, { "epoch": 1.6167400881057268, "grad_norm": 1.6227557131714891, "learning_rate": 1.9462856129579144e-06, "loss": 0.3793888986110687, "step": 4588 }, { "epoch": 1.6170925110132157, "grad_norm": 1.6662726387585254, "learning_rate": 1.94283326465047e-06, "loss": 0.6129955053329468, "step": 4589 }, { "epoch": 1.6174449339207049, "grad_norm": 1.927411767174183, "learning_rate": 1.9393836515003874e-06, "loss": 0.7420347929000854, "step": 4590 }, { "epoch": 1.6177973568281938, "grad_norm": 1.810002162071199, "learning_rate": 1.9359367746786993e-06, "loss": 0.49013108015060425, "step": 4591 }, { "epoch": 1.618149779735683, "grad_norm": 1.8150752517575908, "learning_rate": 1.932492635355513e-06, "loss": 0.5198413133621216, "step": 4592 }, { "epoch": 1.618502202643172, "grad_norm": 1.9402976415289777, "learning_rate": 1.929051234700007e-06, "loss": 0.6031092405319214, "step": 4593 }, { "epoch": 1.6188546255506608, "grad_norm": 2.041490312444486, "learning_rate": 1.9256125738804264e-06, "loss": 0.6269406080245972, "step": 4594 }, { "epoch": 1.6192070484581498, "grad_norm": 1.801972947869227, "learning_rate": 1.922176654064096e-06, "loss": 0.4518774747848511, "step": 4595 }, { "epoch": 1.6195594713656387, "grad_norm": 1.8680481961289441, "learning_rate": 1.9187434764174027e-06, "loss": 0.6199424862861633, "step": 4596 }, { "epoch": 1.6199118942731277, "grad_norm": 2.634014207343412, "learning_rate": 1.9153130421057955e-06, "loss": 0.5155355930328369, "step": 4597 }, { "epoch": 1.6202643171806166, "grad_norm": 1.8081505074484028, "learning_rate": 1.9118853522938087e-06, "loss": 0.6188424229621887, "step": 4598 }, { "epoch": 1.6206167400881057, "grad_norm": 1.8999856535081827, "learning_rate": 1.908460408145033e-06, "loss": 0.5807337164878845, "step": 4599 }, { "epoch": 1.6209691629955947, "grad_norm": 1.6142171687185456, "learning_rate": 1.9050382108221311e-06, "loss": 0.5258378982543945, "step": 4600 }, { "epoch": 1.6213215859030838, "grad_norm": 1.9194714558474444, "learning_rate": 1.9016187614868308e-06, "loss": 0.6612311601638794, "step": 4601 }, { "epoch": 1.6216740088105728, "grad_norm": 1.7849999472385678, "learning_rate": 1.8982020612999285e-06, "loss": 0.611383855342865, "step": 4602 }, { "epoch": 1.6220264317180617, "grad_norm": 1.7599275323638883, "learning_rate": 1.894788111421294e-06, "loss": 0.6111105680465698, "step": 4603 }, { "epoch": 1.6223788546255506, "grad_norm": 2.061255928544227, "learning_rate": 1.8913769130098504e-06, "loss": 0.7554557919502258, "step": 4604 }, { "epoch": 1.6227312775330396, "grad_norm": 1.7818402726516558, "learning_rate": 1.887968467223591e-06, "loss": 0.597324013710022, "step": 4605 }, { "epoch": 1.6230837004405285, "grad_norm": 2.3192399293978014, "learning_rate": 1.8845627752195839e-06, "loss": 0.6232750415802002, "step": 4606 }, { "epoch": 1.6234361233480175, "grad_norm": 1.7697166073683794, "learning_rate": 1.8811598381539543e-06, "loss": 0.45699936151504517, "step": 4607 }, { "epoch": 1.6237885462555066, "grad_norm": 1.9980768091261172, "learning_rate": 1.87775965718189e-06, "loss": 0.5307953953742981, "step": 4608 }, { "epoch": 1.6241409691629956, "grad_norm": 1.8817640717556428, "learning_rate": 1.8743622334576495e-06, "loss": 0.6013764142990112, "step": 4609 }, { "epoch": 1.6244933920704847, "grad_norm": 2.0614740198183066, "learning_rate": 1.8709675681345485e-06, "loss": 0.5143340826034546, "step": 4610 }, { "epoch": 1.6248458149779736, "grad_norm": 1.6895900050976231, "learning_rate": 1.8675756623649788e-06, "loss": 0.506861686706543, "step": 4611 }, { "epoch": 1.6251982378854626, "grad_norm": 2.223885866703504, "learning_rate": 1.8641865173003793e-06, "loss": 0.6807849407196045, "step": 4612 }, { "epoch": 1.6255506607929515, "grad_norm": 1.8930990565263293, "learning_rate": 1.8608001340912573e-06, "loss": 0.592629075050354, "step": 4613 }, { "epoch": 1.6259030837004405, "grad_norm": 2.032831166123834, "learning_rate": 1.8574165138871925e-06, "loss": 0.5669249296188354, "step": 4614 }, { "epoch": 1.6262555066079294, "grad_norm": 1.9071887451281335, "learning_rate": 1.8540356578368135e-06, "loss": 0.7123057246208191, "step": 4615 }, { "epoch": 1.6266079295154185, "grad_norm": 1.7499585996323015, "learning_rate": 1.8506575670878168e-06, "loss": 0.5844429731369019, "step": 4616 }, { "epoch": 1.6269603524229075, "grad_norm": 1.8176797951508414, "learning_rate": 1.8472822427869597e-06, "loss": 0.661457359790802, "step": 4617 }, { "epoch": 1.6273127753303964, "grad_norm": 1.9714232511915755, "learning_rate": 1.8439096860800565e-06, "loss": 0.6944575905799866, "step": 4618 }, { "epoch": 1.6276651982378856, "grad_norm": 1.9471855664955058, "learning_rate": 1.8405398981119927e-06, "loss": 0.5818712115287781, "step": 4619 }, { "epoch": 1.6280176211453745, "grad_norm": 1.8573981084806426, "learning_rate": 1.8371728800266964e-06, "loss": 0.6373921632766724, "step": 4620 }, { "epoch": 1.6283700440528635, "grad_norm": 1.8455409169726698, "learning_rate": 1.8338086329671734e-06, "loss": 0.4629862904548645, "step": 4621 }, { "epoch": 1.6287224669603524, "grad_norm": 2.1547215929268306, "learning_rate": 1.8304471580754779e-06, "loss": 0.6537790894508362, "step": 4622 }, { "epoch": 1.6290748898678413, "grad_norm": 1.9071168587624383, "learning_rate": 1.8270884564927272e-06, "loss": 0.527474582195282, "step": 4623 }, { "epoch": 1.6294273127753303, "grad_norm": 1.9134019886674338, "learning_rate": 1.8237325293590934e-06, "loss": 0.48941463232040405, "step": 4624 }, { "epoch": 1.6297797356828194, "grad_norm": 1.7797372995747724, "learning_rate": 1.8203793778138123e-06, "loss": 0.6276243925094604, "step": 4625 }, { "epoch": 1.6301321585903084, "grad_norm": 2.175835170708709, "learning_rate": 1.8170290029951708e-06, "loss": 0.6339844465255737, "step": 4626 }, { "epoch": 1.6304845814977975, "grad_norm": 1.8667689453086813, "learning_rate": 1.813681406040524e-06, "loss": 0.517188549041748, "step": 4627 }, { "epoch": 1.6308370044052865, "grad_norm": 1.8956914399941025, "learning_rate": 1.8103365880862667e-06, "loss": 0.576552152633667, "step": 4628 }, { "epoch": 1.6311894273127754, "grad_norm": 1.7936413452903872, "learning_rate": 1.8069945502678688e-06, "loss": 0.5703557729721069, "step": 4629 }, { "epoch": 1.6315418502202643, "grad_norm": 1.9048409586347532, "learning_rate": 1.8036552937198447e-06, "loss": 0.538072943687439, "step": 4630 }, { "epoch": 1.6318942731277533, "grad_norm": 1.6721149802212347, "learning_rate": 1.8003188195757693e-06, "loss": 0.4144761562347412, "step": 4631 }, { "epoch": 1.6322466960352422, "grad_norm": 2.056410628146389, "learning_rate": 1.7969851289682704e-06, "loss": 0.5357951521873474, "step": 4632 }, { "epoch": 1.6325991189427311, "grad_norm": 1.9601913826257962, "learning_rate": 1.7936542230290333e-06, "loss": 0.6158766746520996, "step": 4633 }, { "epoch": 1.6329515418502203, "grad_norm": 2.018782202231636, "learning_rate": 1.790326102888794e-06, "loss": 0.7278525233268738, "step": 4634 }, { "epoch": 1.6333039647577092, "grad_norm": 1.8937378067838377, "learning_rate": 1.787000769677354e-06, "loss": 0.5113881230354309, "step": 4635 }, { "epoch": 1.6336563876651984, "grad_norm": 2.2218997592930987, "learning_rate": 1.7836782245235485e-06, "loss": 0.6247432827949524, "step": 4636 }, { "epoch": 1.6340088105726873, "grad_norm": 1.9409043558834718, "learning_rate": 1.7803584685552877e-06, "loss": 0.513325572013855, "step": 4637 }, { "epoch": 1.6343612334801763, "grad_norm": 2.023194297584799, "learning_rate": 1.7770415028995213e-06, "loss": 0.4980276823043823, "step": 4638 }, { "epoch": 1.6347136563876652, "grad_norm": 1.8669544509684106, "learning_rate": 1.7737273286822565e-06, "loss": 0.5832515954971313, "step": 4639 }, { "epoch": 1.6350660792951541, "grad_norm": 1.7519671458346908, "learning_rate": 1.7704159470285532e-06, "loss": 0.6030116081237793, "step": 4640 }, { "epoch": 1.635418502202643, "grad_norm": 2.26980120712081, "learning_rate": 1.7671073590625188e-06, "loss": 0.5494866371154785, "step": 4641 }, { "epoch": 1.635770925110132, "grad_norm": 1.8803060042220399, "learning_rate": 1.7638015659073216e-06, "loss": 0.617791473865509, "step": 4642 }, { "epoch": 1.6361233480176212, "grad_norm": 1.8809591920257003, "learning_rate": 1.760498568685175e-06, "loss": 0.5213589668273926, "step": 4643 }, { "epoch": 1.63647577092511, "grad_norm": 1.7835752431606857, "learning_rate": 1.7571983685173367e-06, "loss": 0.5114192962646484, "step": 4644 }, { "epoch": 1.6368281938325993, "grad_norm": 1.8264916856765907, "learning_rate": 1.7539009665241291e-06, "loss": 0.6207156181335449, "step": 4645 }, { "epoch": 1.6371806167400882, "grad_norm": 1.7037955383522276, "learning_rate": 1.750606363824915e-06, "loss": 0.5893350839614868, "step": 4646 }, { "epoch": 1.6375330396475771, "grad_norm": 2.0239756750398077, "learning_rate": 1.7473145615381092e-06, "loss": 0.6453898549079895, "step": 4647 }, { "epoch": 1.637885462555066, "grad_norm": 1.623565893456343, "learning_rate": 1.7440255607811773e-06, "loss": 0.5098680853843689, "step": 4648 }, { "epoch": 1.638237885462555, "grad_norm": 1.9009179186379688, "learning_rate": 1.7407393626706305e-06, "loss": 0.5841408967971802, "step": 4649 }, { "epoch": 1.638590308370044, "grad_norm": 1.8903189372223002, "learning_rate": 1.7374559683220337e-06, "loss": 0.5593127012252808, "step": 4650 }, { "epoch": 1.638942731277533, "grad_norm": 1.9192509501465884, "learning_rate": 1.7341753788499983e-06, "loss": 0.6885190606117249, "step": 4651 }, { "epoch": 1.639295154185022, "grad_norm": 2.019948918382337, "learning_rate": 1.730897595368175e-06, "loss": 0.6271092891693115, "step": 4652 }, { "epoch": 1.639647577092511, "grad_norm": 1.8193728432309102, "learning_rate": 1.7276226189892763e-06, "loss": 0.6035536527633667, "step": 4653 }, { "epoch": 1.6400000000000001, "grad_norm": 1.876741558260643, "learning_rate": 1.724350450825052e-06, "loss": 0.49980080127716064, "step": 4654 }, { "epoch": 1.640352422907489, "grad_norm": 1.945483701689467, "learning_rate": 1.721081091986303e-06, "loss": 0.6056489944458008, "step": 4655 }, { "epoch": 1.640704845814978, "grad_norm": 1.998934183218588, "learning_rate": 1.717814543582873e-06, "loss": 0.5611459016799927, "step": 4656 }, { "epoch": 1.641057268722467, "grad_norm": 1.8501618159787931, "learning_rate": 1.7145508067236515e-06, "loss": 0.5655262470245361, "step": 4657 }, { "epoch": 1.6414096916299559, "grad_norm": 2.17470073262635, "learning_rate": 1.7112898825165814e-06, "loss": 0.7793601751327515, "step": 4658 }, { "epoch": 1.6417621145374448, "grad_norm": 1.8078904709838137, "learning_rate": 1.7080317720686434e-06, "loss": 0.6587018370628357, "step": 4659 }, { "epoch": 1.642114537444934, "grad_norm": 2.0052578395520313, "learning_rate": 1.7047764764858598e-06, "loss": 0.5546305775642395, "step": 4660 }, { "epoch": 1.642466960352423, "grad_norm": 2.2168924782846844, "learning_rate": 1.7015239968733066e-06, "loss": 0.6215736865997314, "step": 4661 }, { "epoch": 1.642819383259912, "grad_norm": 1.95167913439103, "learning_rate": 1.6982743343350983e-06, "loss": 0.5772532224655151, "step": 4662 }, { "epoch": 1.643171806167401, "grad_norm": 1.9049742666250684, "learning_rate": 1.6950274899743947e-06, "loss": 0.567034900188446, "step": 4663 }, { "epoch": 1.64352422907489, "grad_norm": 1.6486603082479945, "learning_rate": 1.6917834648933985e-06, "loss": 0.5306716561317444, "step": 4664 }, { "epoch": 1.6438766519823789, "grad_norm": 1.923372734442966, "learning_rate": 1.688542260193351e-06, "loss": 0.6691634654998779, "step": 4665 }, { "epoch": 1.6442290748898678, "grad_norm": 1.9073972200097022, "learning_rate": 1.6853038769745466e-06, "loss": 0.6071977615356445, "step": 4666 }, { "epoch": 1.6445814977973567, "grad_norm": 3.0113580201176355, "learning_rate": 1.6820683163363161e-06, "loss": 0.743544340133667, "step": 4667 }, { "epoch": 1.6449339207048457, "grad_norm": 2.2198521832647864, "learning_rate": 1.6788355793770238e-06, "loss": 0.5745127201080322, "step": 4668 }, { "epoch": 1.6452863436123348, "grad_norm": 2.083730313741091, "learning_rate": 1.6756056671940902e-06, "loss": 0.5153336524963379, "step": 4669 }, { "epoch": 1.6456387665198238, "grad_norm": 1.888215895134721, "learning_rate": 1.6723785808839666e-06, "loss": 0.5780388116836548, "step": 4670 }, { "epoch": 1.645991189427313, "grad_norm": 1.975333041709577, "learning_rate": 1.6691543215421513e-06, "loss": 0.601921796798706, "step": 4671 }, { "epoch": 1.6463436123348019, "grad_norm": 1.8402715148458082, "learning_rate": 1.6659328902631766e-06, "loss": 0.6636123657226562, "step": 4672 }, { "epoch": 1.6466960352422908, "grad_norm": 1.804292320266694, "learning_rate": 1.6627142881406188e-06, "loss": 0.45225393772125244, "step": 4673 }, { "epoch": 1.6470484581497797, "grad_norm": 1.862693343451114, "learning_rate": 1.6594985162670984e-06, "loss": 0.6406756043434143, "step": 4674 }, { "epoch": 1.6474008810572687, "grad_norm": 2.11645792406816, "learning_rate": 1.6562855757342632e-06, "loss": 0.6735906600952148, "step": 4675 }, { "epoch": 1.6477533039647576, "grad_norm": 1.9503356292211693, "learning_rate": 1.6530754676328064e-06, "loss": 0.515188992023468, "step": 4676 }, { "epoch": 1.6481057268722465, "grad_norm": 1.721977079638204, "learning_rate": 1.6498681930524652e-06, "loss": 0.5976129174232483, "step": 4677 }, { "epoch": 1.6484581497797357, "grad_norm": 1.9285425022468947, "learning_rate": 1.6466637530820074e-06, "loss": 0.7367427945137024, "step": 4678 }, { "epoch": 1.6488105726872246, "grad_norm": 2.073959448612198, "learning_rate": 1.6434621488092385e-06, "loss": 0.5173717737197876, "step": 4679 }, { "epoch": 1.6491629955947138, "grad_norm": 2.1289983497571745, "learning_rate": 1.6402633813210056e-06, "loss": 0.7961066961288452, "step": 4680 }, { "epoch": 1.6495154185022027, "grad_norm": 2.1150632325299488, "learning_rate": 1.637067451703187e-06, "loss": 0.8271595239639282, "step": 4681 }, { "epoch": 1.6498678414096917, "grad_norm": 1.9513356704584446, "learning_rate": 1.6338743610407103e-06, "loss": 0.6818888783454895, "step": 4682 }, { "epoch": 1.6502202643171806, "grad_norm": 2.182931567425792, "learning_rate": 1.6306841104175219e-06, "loss": 0.5168178677558899, "step": 4683 }, { "epoch": 1.6505726872246695, "grad_norm": 1.8122401400933128, "learning_rate": 1.627496700916612e-06, "loss": 0.5792043209075928, "step": 4684 }, { "epoch": 1.6509251101321585, "grad_norm": 1.8346977982265331, "learning_rate": 1.624312133620013e-06, "loss": 0.6099069118499756, "step": 4685 }, { "epoch": 1.6512775330396474, "grad_norm": 1.7489569966562013, "learning_rate": 1.6211304096087832e-06, "loss": 0.4562867283821106, "step": 4686 }, { "epoch": 1.6516299559471366, "grad_norm": 1.767208393167573, "learning_rate": 1.61795152996302e-06, "loss": 0.48648780584335327, "step": 4687 }, { "epoch": 1.6519823788546255, "grad_norm": 1.619888597224146, "learning_rate": 1.6147754957618533e-06, "loss": 0.5351820588111877, "step": 4688 }, { "epoch": 1.6523348017621147, "grad_norm": 1.5845894367063569, "learning_rate": 1.6116023080834442e-06, "loss": 0.4646923542022705, "step": 4689 }, { "epoch": 1.6526872246696036, "grad_norm": 2.0533783205545304, "learning_rate": 1.608431968005001e-06, "loss": 0.6257984638214111, "step": 4690 }, { "epoch": 1.6530396475770925, "grad_norm": 1.6714052981831828, "learning_rate": 1.605264476602747e-06, "loss": 0.5646224021911621, "step": 4691 }, { "epoch": 1.6533920704845815, "grad_norm": 1.9704920715227376, "learning_rate": 1.6020998349519457e-06, "loss": 0.6074661612510681, "step": 4692 }, { "epoch": 1.6537444933920704, "grad_norm": 2.119532209280586, "learning_rate": 1.598938044126901e-06, "loss": 0.703096866607666, "step": 4693 }, { "epoch": 1.6540969162995593, "grad_norm": 2.0281924961410436, "learning_rate": 1.5957791052009397e-06, "loss": 0.6677542924880981, "step": 4694 }, { "epoch": 1.6544493392070485, "grad_norm": 2.2554606939897313, "learning_rate": 1.5926230192464232e-06, "loss": 0.755639910697937, "step": 4695 }, { "epoch": 1.6548017621145374, "grad_norm": 2.0543326225263705, "learning_rate": 1.5894697873347442e-06, "loss": 0.7008202075958252, "step": 4696 }, { "epoch": 1.6551541850220264, "grad_norm": 1.7892378339322623, "learning_rate": 1.5863194105363244e-06, "loss": 0.5049681067466736, "step": 4697 }, { "epoch": 1.6555066079295155, "grad_norm": 1.8394208195845667, "learning_rate": 1.583171889920626e-06, "loss": 0.7415407299995422, "step": 4698 }, { "epoch": 1.6558590308370045, "grad_norm": 2.0103355889821404, "learning_rate": 1.5800272265561256e-06, "loss": 0.7949470281600952, "step": 4699 }, { "epoch": 1.6562114537444934, "grad_norm": 2.2401604191268456, "learning_rate": 1.5768854215103435e-06, "loss": 0.5892510414123535, "step": 4700 }, { "epoch": 1.6565638766519823, "grad_norm": 2.1732638193025076, "learning_rate": 1.5737464758498243e-06, "loss": 0.5357394814491272, "step": 4701 }, { "epoch": 1.6569162995594713, "grad_norm": 1.92797804038562, "learning_rate": 1.5706103906401416e-06, "loss": 0.6078016757965088, "step": 4702 }, { "epoch": 1.6572687224669602, "grad_norm": 2.019695184899454, "learning_rate": 1.5674771669459e-06, "loss": 0.5858899354934692, "step": 4703 }, { "epoch": 1.6576211453744494, "grad_norm": 1.819706102358174, "learning_rate": 1.5643468058307287e-06, "loss": 0.5783329010009766, "step": 4704 }, { "epoch": 1.6579735682819383, "grad_norm": 1.8104985438999985, "learning_rate": 1.561219308357288e-06, "loss": 0.5412800312042236, "step": 4705 }, { "epoch": 1.6583259911894275, "grad_norm": 1.5193820753894371, "learning_rate": 1.5580946755872727e-06, "loss": 0.5609365701675415, "step": 4706 }, { "epoch": 1.6586784140969164, "grad_norm": 2.2157168701611427, "learning_rate": 1.554972908581388e-06, "loss": 0.45193177461624146, "step": 4707 }, { "epoch": 1.6590308370044053, "grad_norm": 1.885008861796424, "learning_rate": 1.5518540083993838e-06, "loss": 0.6402257680892944, "step": 4708 }, { "epoch": 1.6593832599118943, "grad_norm": 1.906792902482494, "learning_rate": 1.5487379761000276e-06, "loss": 0.5956071615219116, "step": 4709 }, { "epoch": 1.6597356828193832, "grad_norm": 1.5229764109639101, "learning_rate": 1.5456248127411156e-06, "loss": 0.5975273847579956, "step": 4710 }, { "epoch": 1.6600881057268722, "grad_norm": 2.2860844716103514, "learning_rate": 1.54251451937947e-06, "loss": 0.6914929151535034, "step": 4711 }, { "epoch": 1.660440528634361, "grad_norm": 2.141875122923791, "learning_rate": 1.5394070970709384e-06, "loss": 0.5867592096328735, "step": 4712 }, { "epoch": 1.6607929515418502, "grad_norm": 1.96612759503979, "learning_rate": 1.5363025468703917e-06, "loss": 0.6448687314987183, "step": 4713 }, { "epoch": 1.6611453744493392, "grad_norm": 1.8452223088884994, "learning_rate": 1.5332008698317348e-06, "loss": 0.5870485305786133, "step": 4714 }, { "epoch": 1.6614977973568283, "grad_norm": 1.9043935409080608, "learning_rate": 1.5301020670078803e-06, "loss": 0.6336855888366699, "step": 4715 }, { "epoch": 1.6618502202643173, "grad_norm": 1.9247468731228787, "learning_rate": 1.527006139450784e-06, "loss": 0.5924787521362305, "step": 4716 }, { "epoch": 1.6622026431718062, "grad_norm": 1.5860620334804822, "learning_rate": 1.523913088211415e-06, "loss": 0.5817830562591553, "step": 4717 }, { "epoch": 1.6625550660792952, "grad_norm": 1.8285246452015176, "learning_rate": 1.5208229143397657e-06, "loss": 0.5836390852928162, "step": 4718 }, { "epoch": 1.662907488986784, "grad_norm": 1.5094327417455158, "learning_rate": 1.5177356188848558e-06, "loss": 0.47110515832901, "step": 4719 }, { "epoch": 1.663259911894273, "grad_norm": 2.018838906344594, "learning_rate": 1.5146512028947225e-06, "loss": 0.6376635432243347, "step": 4720 }, { "epoch": 1.663612334801762, "grad_norm": 1.7847798861513196, "learning_rate": 1.5115696674164349e-06, "loss": 0.6399784088134766, "step": 4721 }, { "epoch": 1.6639647577092511, "grad_norm": 2.2125247577405127, "learning_rate": 1.5084910134960773e-06, "loss": 0.5891954898834229, "step": 4722 }, { "epoch": 1.66431718061674, "grad_norm": 1.5827717360956535, "learning_rate": 1.5054152421787505e-06, "loss": 0.6358054876327515, "step": 4723 }, { "epoch": 1.6646696035242292, "grad_norm": 1.855029533228232, "learning_rate": 1.5023423545085892e-06, "loss": 0.5072367191314697, "step": 4724 }, { "epoch": 1.6650220264317181, "grad_norm": 1.8866512172900913, "learning_rate": 1.4992723515287423e-06, "loss": 0.5549881458282471, "step": 4725 }, { "epoch": 1.665374449339207, "grad_norm": 1.5386308243299962, "learning_rate": 1.4962052342813804e-06, "loss": 0.4833364188671112, "step": 4726 }, { "epoch": 1.665726872246696, "grad_norm": 1.837283227568624, "learning_rate": 1.4931410038076922e-06, "loss": 0.6183017492294312, "step": 4727 }, { "epoch": 1.666079295154185, "grad_norm": 1.8220970545699078, "learning_rate": 1.4900796611478885e-06, "loss": 0.4956335127353668, "step": 4728 }, { "epoch": 1.666431718061674, "grad_norm": 2.0221134241832552, "learning_rate": 1.4870212073412027e-06, "loss": 0.7345337271690369, "step": 4729 }, { "epoch": 1.6667841409691628, "grad_norm": 1.5143426871240313, "learning_rate": 1.4839656434258864e-06, "loss": 0.5324833393096924, "step": 4730 }, { "epoch": 1.667136563876652, "grad_norm": 1.7373474898452002, "learning_rate": 1.4809129704392e-06, "loss": 0.5702322125434875, "step": 4731 }, { "epoch": 1.667488986784141, "grad_norm": 1.7374551868496027, "learning_rate": 1.4778631894174389e-06, "loss": 0.46188884973526, "step": 4732 }, { "epoch": 1.66784140969163, "grad_norm": 1.76911142349316, "learning_rate": 1.474816301395906e-06, "loss": 0.5505924224853516, "step": 4733 }, { "epoch": 1.668193832599119, "grad_norm": 1.9422422566247162, "learning_rate": 1.4717723074089251e-06, "loss": 0.5889710187911987, "step": 4734 }, { "epoch": 1.668546255506608, "grad_norm": 2.1059796200005825, "learning_rate": 1.4687312084898387e-06, "loss": 0.5794551372528076, "step": 4735 }, { "epoch": 1.668898678414097, "grad_norm": 1.8650983467603144, "learning_rate": 1.4656930056710006e-06, "loss": 0.567146897315979, "step": 4736 }, { "epoch": 1.6692511013215858, "grad_norm": 2.0850787713289067, "learning_rate": 1.4626576999837938e-06, "loss": 0.5330451130867004, "step": 4737 }, { "epoch": 1.6696035242290748, "grad_norm": 1.868870689701364, "learning_rate": 1.459625292458604e-06, "loss": 0.5631227493286133, "step": 4738 }, { "epoch": 1.669955947136564, "grad_norm": 1.8773546830623118, "learning_rate": 1.456595784124839e-06, "loss": 0.5145374536514282, "step": 4739 }, { "epoch": 1.6703083700440529, "grad_norm": 1.925388120075487, "learning_rate": 1.453569176010927e-06, "loss": 0.59378582239151, "step": 4740 }, { "epoch": 1.6706607929515418, "grad_norm": 2.053494266916917, "learning_rate": 1.4505454691443043e-06, "loss": 0.5827980041503906, "step": 4741 }, { "epoch": 1.671013215859031, "grad_norm": 1.6369511357690396, "learning_rate": 1.4475246645514274e-06, "loss": 0.5270858407020569, "step": 4742 }, { "epoch": 1.67136563876652, "grad_norm": 2.2160806515473186, "learning_rate": 1.4445067632577625e-06, "loss": 0.4708535373210907, "step": 4743 }, { "epoch": 1.6717180616740088, "grad_norm": 1.6498079624073576, "learning_rate": 1.4414917662877924e-06, "loss": 0.5505239963531494, "step": 4744 }, { "epoch": 1.6720704845814978, "grad_norm": 1.8451834665357323, "learning_rate": 1.4384796746650221e-06, "loss": 0.5841302871704102, "step": 4745 }, { "epoch": 1.6724229074889867, "grad_norm": 1.8665624096794386, "learning_rate": 1.4354704894119554e-06, "loss": 0.627534031867981, "step": 4746 }, { "epoch": 1.6727753303964756, "grad_norm": 1.9526983627618448, "learning_rate": 1.4324642115501165e-06, "loss": 0.6160094738006592, "step": 4747 }, { "epoch": 1.6731277533039648, "grad_norm": 1.6909104461316946, "learning_rate": 1.4294608421000489e-06, "loss": 0.5420609712600708, "step": 4748 }, { "epoch": 1.6734801762114537, "grad_norm": 1.9597720364889828, "learning_rate": 1.4264603820813006e-06, "loss": 0.7729714512825012, "step": 4749 }, { "epoch": 1.6738325991189429, "grad_norm": 1.8780592513411432, "learning_rate": 1.4234628325124345e-06, "loss": 0.6458747386932373, "step": 4750 }, { "epoch": 1.6741850220264318, "grad_norm": 1.717642350217617, "learning_rate": 1.4204681944110242e-06, "loss": 0.5250670313835144, "step": 4751 }, { "epoch": 1.6745374449339208, "grad_norm": 1.9945004637909651, "learning_rate": 1.4174764687936548e-06, "loss": 0.4985695779323578, "step": 4752 }, { "epoch": 1.6748898678414097, "grad_norm": 2.1058731387570253, "learning_rate": 1.4144876566759303e-06, "loss": 0.6401104927062988, "step": 4753 }, { "epoch": 1.6752422907488986, "grad_norm": 1.8242149794974472, "learning_rate": 1.4115017590724534e-06, "loss": 0.5991432666778564, "step": 4754 }, { "epoch": 1.6755947136563876, "grad_norm": 1.8977892116048576, "learning_rate": 1.4085187769968433e-06, "loss": 0.7787071466445923, "step": 4755 }, { "epoch": 1.6759471365638765, "grad_norm": 1.8915051082154768, "learning_rate": 1.4055387114617336e-06, "loss": 0.6977101564407349, "step": 4756 }, { "epoch": 1.6762995594713657, "grad_norm": 1.7182871374874729, "learning_rate": 1.4025615634787616e-06, "loss": 0.541371762752533, "step": 4757 }, { "epoch": 1.6766519823788546, "grad_norm": 1.8269477268502086, "learning_rate": 1.3995873340585765e-06, "loss": 0.5548759698867798, "step": 4758 }, { "epoch": 1.6770044052863438, "grad_norm": 1.947083457475871, "learning_rate": 1.3966160242108373e-06, "loss": 0.6022241115570068, "step": 4759 }, { "epoch": 1.6773568281938327, "grad_norm": 2.1542376603491946, "learning_rate": 1.3936476349442074e-06, "loss": 0.4965083599090576, "step": 4760 }, { "epoch": 1.6777092511013216, "grad_norm": 2.4193138120349227, "learning_rate": 1.3906821672663706e-06, "loss": 0.6712369918823242, "step": 4761 }, { "epoch": 1.6780616740088106, "grad_norm": 1.8527401573304754, "learning_rate": 1.3877196221840038e-06, "loss": 0.6236127614974976, "step": 4762 }, { "epoch": 1.6784140969162995, "grad_norm": 1.9836915293869917, "learning_rate": 1.3847600007027996e-06, "loss": 0.7144246101379395, "step": 4763 }, { "epoch": 1.6787665198237884, "grad_norm": 1.926900514093349, "learning_rate": 1.3818033038274602e-06, "loss": 0.650489091873169, "step": 4764 }, { "epoch": 1.6791189427312774, "grad_norm": 1.896615210676468, "learning_rate": 1.3788495325616912e-06, "loss": 0.6711791157722473, "step": 4765 }, { "epoch": 1.6794713656387665, "grad_norm": 1.6640253715487854, "learning_rate": 1.375898687908206e-06, "loss": 0.49629515409469604, "step": 4766 }, { "epoch": 1.6798237885462555, "grad_norm": 2.3032521123469727, "learning_rate": 1.372950770868724e-06, "loss": 0.5843443870544434, "step": 4767 }, { "epoch": 1.6801762114537446, "grad_norm": 1.7269921421841483, "learning_rate": 1.3700057824439694e-06, "loss": 0.6201150417327881, "step": 4768 }, { "epoch": 1.6805286343612336, "grad_norm": 2.2518096795033746, "learning_rate": 1.3670637236336815e-06, "loss": 0.6149473190307617, "step": 4769 }, { "epoch": 1.6808810572687225, "grad_norm": 1.8297389667337718, "learning_rate": 1.3641245954365868e-06, "loss": 0.476188987493515, "step": 4770 }, { "epoch": 1.6812334801762114, "grad_norm": 1.6086104459565809, "learning_rate": 1.361188398850436e-06, "loss": 0.4850924015045166, "step": 4771 }, { "epoch": 1.6815859030837004, "grad_norm": 2.8146145731538676, "learning_rate": 1.3582551348719741e-06, "loss": 0.6008634567260742, "step": 4772 }, { "epoch": 1.6819383259911893, "grad_norm": 1.6382847925926618, "learning_rate": 1.3553248044969525e-06, "loss": 0.5383377075195312, "step": 4773 }, { "epoch": 1.6822907488986785, "grad_norm": 1.966985389102481, "learning_rate": 1.3523974087201274e-06, "loss": 0.5711555480957031, "step": 4774 }, { "epoch": 1.6826431718061674, "grad_norm": 1.915810750390724, "learning_rate": 1.3494729485352586e-06, "loss": 0.5267810821533203, "step": 4775 }, { "epoch": 1.6829955947136563, "grad_norm": 1.968063769811982, "learning_rate": 1.3465514249351076e-06, "loss": 0.6203084588050842, "step": 4776 }, { "epoch": 1.6833480176211455, "grad_norm": 1.604432029465195, "learning_rate": 1.3436328389114473e-06, "loss": 0.46672314405441284, "step": 4777 }, { "epoch": 1.6837004405286344, "grad_norm": 2.175917964334397, "learning_rate": 1.3407171914550366e-06, "loss": 0.6375850439071655, "step": 4778 }, { "epoch": 1.6840528634361234, "grad_norm": 1.7467776544405884, "learning_rate": 1.337804483555656e-06, "loss": 0.6162304282188416, "step": 4779 }, { "epoch": 1.6844052863436123, "grad_norm": 2.0769560048267817, "learning_rate": 1.3348947162020741e-06, "loss": 0.7814363241195679, "step": 4780 }, { "epoch": 1.6847577092511012, "grad_norm": 1.4969648698838118, "learning_rate": 1.3319878903820682e-06, "loss": 0.47330981492996216, "step": 4781 }, { "epoch": 1.6851101321585902, "grad_norm": 2.184365435582337, "learning_rate": 1.3290840070824163e-06, "loss": 0.759529173374176, "step": 4782 }, { "epoch": 1.6854625550660793, "grad_norm": 2.039688209679945, "learning_rate": 1.326183067288893e-06, "loss": 0.7727639675140381, "step": 4783 }, { "epoch": 1.6858149779735683, "grad_norm": 1.9567066145193837, "learning_rate": 1.3232850719862789e-06, "loss": 0.6429058313369751, "step": 4784 }, { "epoch": 1.6861674008810574, "grad_norm": 2.394172291442893, "learning_rate": 1.3203900221583565e-06, "loss": 0.5895540714263916, "step": 4785 }, { "epoch": 1.6865198237885464, "grad_norm": 1.835232130498821, "learning_rate": 1.317497918787899e-06, "loss": 0.5410366058349609, "step": 4786 }, { "epoch": 1.6868722466960353, "grad_norm": 2.025742052316709, "learning_rate": 1.3146087628566894e-06, "loss": 0.5144281387329102, "step": 4787 }, { "epoch": 1.6872246696035242, "grad_norm": 2.3397936718705967, "learning_rate": 1.311722555345506e-06, "loss": 0.7025437355041504, "step": 4788 }, { "epoch": 1.6875770925110132, "grad_norm": 1.7878267984100036, "learning_rate": 1.3088392972341256e-06, "loss": 0.6000303030014038, "step": 4789 }, { "epoch": 1.6879295154185021, "grad_norm": 2.057293191164908, "learning_rate": 1.3059589895013248e-06, "loss": 0.5732883214950562, "step": 4790 }, { "epoch": 1.688281938325991, "grad_norm": 1.83266529432573, "learning_rate": 1.3030816331248785e-06, "loss": 0.5908600091934204, "step": 4791 }, { "epoch": 1.6886343612334802, "grad_norm": 1.7317933799112057, "learning_rate": 1.3002072290815571e-06, "loss": 0.5579310655593872, "step": 4792 }, { "epoch": 1.6889867841409691, "grad_norm": 1.9713002881427846, "learning_rate": 1.2973357783471385e-06, "loss": 0.6439195871353149, "step": 4793 }, { "epoch": 1.6893392070484583, "grad_norm": 2.1068548457431437, "learning_rate": 1.2944672818963822e-06, "loss": 0.6213329434394836, "step": 4794 }, { "epoch": 1.6896916299559472, "grad_norm": 2.271454462540911, "learning_rate": 1.291601740703059e-06, "loss": 0.5875385999679565, "step": 4795 }, { "epoch": 1.6900440528634362, "grad_norm": 1.8170542219049632, "learning_rate": 1.2887391557399287e-06, "loss": 0.7071372270584106, "step": 4796 }, { "epoch": 1.690396475770925, "grad_norm": 1.9467140791895143, "learning_rate": 1.2858795279787517e-06, "loss": 0.504144549369812, "step": 4797 }, { "epoch": 1.690748898678414, "grad_norm": 1.9410537548952953, "learning_rate": 1.2830228583902816e-06, "loss": 0.7081021070480347, "step": 4798 }, { "epoch": 1.691101321585903, "grad_norm": 1.679906270518348, "learning_rate": 1.2801691479442658e-06, "loss": 0.5843057632446289, "step": 4799 }, { "epoch": 1.691453744493392, "grad_norm": 2.366764710202919, "learning_rate": 1.2773183976094571e-06, "loss": 0.6437872052192688, "step": 4800 }, { "epoch": 1.691806167400881, "grad_norm": 2.0784896951955125, "learning_rate": 1.2744706083535907e-06, "loss": 0.6945379972457886, "step": 4801 }, { "epoch": 1.69215859030837, "grad_norm": 1.5919193491775196, "learning_rate": 1.2716257811434019e-06, "loss": 0.5827867984771729, "step": 4802 }, { "epoch": 1.6925110132158592, "grad_norm": 2.6733992431993228, "learning_rate": 1.2687839169446259e-06, "loss": 0.5298784971237183, "step": 4803 }, { "epoch": 1.692863436123348, "grad_norm": 2.069188268688187, "learning_rate": 1.2659450167219834e-06, "loss": 0.6885675191879272, "step": 4804 }, { "epoch": 1.693215859030837, "grad_norm": 1.8639055588125417, "learning_rate": 1.2631090814391945e-06, "loss": 0.6902546882629395, "step": 4805 }, { "epoch": 1.693568281938326, "grad_norm": 1.7529564321244235, "learning_rate": 1.2602761120589713e-06, "loss": 0.5972022414207458, "step": 4806 }, { "epoch": 1.693920704845815, "grad_norm": 1.676709469876984, "learning_rate": 1.2574461095430145e-06, "loss": 0.5527150630950928, "step": 4807 }, { "epoch": 1.6942731277533039, "grad_norm": 1.6073136228470446, "learning_rate": 1.2546190748520294e-06, "loss": 0.5898724794387817, "step": 4808 }, { "epoch": 1.6946255506607928, "grad_norm": 1.7947012597219074, "learning_rate": 1.2517950089457e-06, "loss": 0.7023364901542664, "step": 4809 }, { "epoch": 1.694977973568282, "grad_norm": 1.6620582383673839, "learning_rate": 1.2489739127827083e-06, "loss": 0.6016935706138611, "step": 4810 }, { "epoch": 1.6953303964757709, "grad_norm": 1.8540813823422424, "learning_rate": 1.246155787320733e-06, "loss": 0.5724658966064453, "step": 4811 }, { "epoch": 1.69568281938326, "grad_norm": 1.799661687922518, "learning_rate": 1.2433406335164389e-06, "loss": 0.5886126160621643, "step": 4812 }, { "epoch": 1.696035242290749, "grad_norm": 1.7467523658249466, "learning_rate": 1.2405284523254823e-06, "loss": 0.6656844615936279, "step": 4813 }, { "epoch": 1.696387665198238, "grad_norm": 1.9261091270854245, "learning_rate": 1.237719244702511e-06, "loss": 0.6208533644676208, "step": 4814 }, { "epoch": 1.6967400881057269, "grad_norm": 2.00988865324314, "learning_rate": 1.234913011601162e-06, "loss": 0.6895248889923096, "step": 4815 }, { "epoch": 1.6970925110132158, "grad_norm": 1.755568469718746, "learning_rate": 1.23210975397407e-06, "loss": 0.5525833368301392, "step": 4816 }, { "epoch": 1.6974449339207047, "grad_norm": 1.9922303788563613, "learning_rate": 1.2293094727728471e-06, "loss": 0.5958225727081299, "step": 4817 }, { "epoch": 1.6977973568281939, "grad_norm": 1.8538893137799548, "learning_rate": 1.226512168948103e-06, "loss": 0.7570905089378357, "step": 4818 }, { "epoch": 1.6981497797356828, "grad_norm": 1.9122257264522353, "learning_rate": 1.2237178434494379e-06, "loss": 0.586568295955658, "step": 4819 }, { "epoch": 1.6985022026431718, "grad_norm": 2.0787640518450257, "learning_rate": 1.2209264972254365e-06, "loss": 0.574797511100769, "step": 4820 }, { "epoch": 1.698854625550661, "grad_norm": 1.8487761386635184, "learning_rate": 1.2181381312236751e-06, "loss": 0.6277909278869629, "step": 4821 }, { "epoch": 1.6992070484581498, "grad_norm": 1.6733284685909569, "learning_rate": 1.2153527463907155e-06, "loss": 0.6308181285858154, "step": 4822 }, { "epoch": 1.6995594713656388, "grad_norm": 1.493809970809451, "learning_rate": 1.2125703436721091e-06, "loss": 0.5076426267623901, "step": 4823 }, { "epoch": 1.6999118942731277, "grad_norm": 1.9773484387601714, "learning_rate": 1.2097909240123994e-06, "loss": 0.7712167501449585, "step": 4824 }, { "epoch": 1.7002643171806167, "grad_norm": 2.0486043522302517, "learning_rate": 1.2070144883551072e-06, "loss": 0.5964892506599426, "step": 4825 }, { "epoch": 1.7006167400881056, "grad_norm": 2.112366587786807, "learning_rate": 1.2042410376427472e-06, "loss": 0.7473628520965576, "step": 4826 }, { "epoch": 1.7009691629955948, "grad_norm": 2.17212484756118, "learning_rate": 1.2014705728168219e-06, "loss": 0.6897937059402466, "step": 4827 }, { "epoch": 1.7013215859030837, "grad_norm": 1.734585395626036, "learning_rate": 1.198703094817817e-06, "loss": 0.5955557823181152, "step": 4828 }, { "epoch": 1.7016740088105728, "grad_norm": 1.8689895875293312, "learning_rate": 1.195938604585205e-06, "loss": 0.7051092386245728, "step": 4829 }, { "epoch": 1.7020264317180618, "grad_norm": 3.0972606123503836, "learning_rate": 1.1931771030574446e-06, "loss": 0.584572434425354, "step": 4830 }, { "epoch": 1.7023788546255507, "grad_norm": 1.9375690474214398, "learning_rate": 1.1904185911719768e-06, "loss": 0.5691804885864258, "step": 4831 }, { "epoch": 1.7027312775330397, "grad_norm": 2.088088383810667, "learning_rate": 1.187663069865237e-06, "loss": 0.5539572834968567, "step": 4832 }, { "epoch": 1.7030837004405286, "grad_norm": 2.0666580745541956, "learning_rate": 1.1849105400726324e-06, "loss": 0.6229352951049805, "step": 4833 }, { "epoch": 1.7034361233480175, "grad_norm": 1.9668453557048677, "learning_rate": 1.1821610027285613e-06, "loss": 0.7302919626235962, "step": 4834 }, { "epoch": 1.7037885462555065, "grad_norm": 1.6339139133298357, "learning_rate": 1.1794144587664113e-06, "loss": 0.6802065372467041, "step": 4835 }, { "epoch": 1.7041409691629956, "grad_norm": 1.8504754714684983, "learning_rate": 1.1766709091185447e-06, "loss": 0.6448635458946228, "step": 4836 }, { "epoch": 1.7044933920704846, "grad_norm": 1.9824663488252348, "learning_rate": 1.1739303547163138e-06, "loss": 0.6141834259033203, "step": 4837 }, { "epoch": 1.7048458149779737, "grad_norm": 1.8134017158002862, "learning_rate": 1.1711927964900482e-06, "loss": 0.5634737014770508, "step": 4838 }, { "epoch": 1.7051982378854627, "grad_norm": 1.7474752293775022, "learning_rate": 1.1684582353690642e-06, "loss": 0.6786668300628662, "step": 4839 }, { "epoch": 1.7055506607929516, "grad_norm": 1.8867560516103576, "learning_rate": 1.1657266722816663e-06, "loss": 0.6117962002754211, "step": 4840 }, { "epoch": 1.7059030837004405, "grad_norm": 1.8617179395667027, "learning_rate": 1.1629981081551278e-06, "loss": 0.6115782260894775, "step": 4841 }, { "epoch": 1.7062555066079295, "grad_norm": 1.676805324865966, "learning_rate": 1.1602725439157114e-06, "loss": 0.6526266932487488, "step": 4842 }, { "epoch": 1.7066079295154184, "grad_norm": 1.8958276581556894, "learning_rate": 1.1575499804886658e-06, "loss": 0.5449249148368835, "step": 4843 }, { "epoch": 1.7069603524229073, "grad_norm": 1.7291534323462554, "learning_rate": 1.1548304187982152e-06, "loss": 0.5793930292129517, "step": 4844 }, { "epoch": 1.7073127753303965, "grad_norm": 1.9655642851245159, "learning_rate": 1.152113859767565e-06, "loss": 0.5133150815963745, "step": 4845 }, { "epoch": 1.7076651982378854, "grad_norm": 2.3523116804029973, "learning_rate": 1.1494003043189028e-06, "loss": 0.6771460771560669, "step": 4846 }, { "epoch": 1.7080176211453746, "grad_norm": 1.651478684492262, "learning_rate": 1.1466897533733945e-06, "loss": 0.5680071115493774, "step": 4847 }, { "epoch": 1.7083700440528635, "grad_norm": 1.5749772965105935, "learning_rate": 1.1439822078511941e-06, "loss": 0.58878493309021, "step": 4848 }, { "epoch": 1.7087224669603525, "grad_norm": 2.108145787297885, "learning_rate": 1.14127766867142e-06, "loss": 0.6441792249679565, "step": 4849 }, { "epoch": 1.7090748898678414, "grad_norm": 1.98601005576605, "learning_rate": 1.1385761367521865e-06, "loss": 0.4910963773727417, "step": 4850 }, { "epoch": 1.7094273127753303, "grad_norm": 1.860202988226145, "learning_rate": 1.1358776130105765e-06, "loss": 0.5878154635429382, "step": 4851 }, { "epoch": 1.7097797356828193, "grad_norm": 1.8203129566458394, "learning_rate": 1.133182098362654e-06, "loss": 0.5835394859313965, "step": 4852 }, { "epoch": 1.7101321585903082, "grad_norm": 2.552503327854629, "learning_rate": 1.130489593723465e-06, "loss": 0.6612577438354492, "step": 4853 }, { "epoch": 1.7104845814977974, "grad_norm": 1.6914248252090902, "learning_rate": 1.1278001000070282e-06, "loss": 0.5892096757888794, "step": 4854 }, { "epoch": 1.7108370044052863, "grad_norm": 1.597330243290686, "learning_rate": 1.1251136181263433e-06, "loss": 0.6196271181106567, "step": 4855 }, { "epoch": 1.7111894273127755, "grad_norm": 1.7525774926334832, "learning_rate": 1.122430148993392e-06, "loss": 0.5365586280822754, "step": 4856 }, { "epoch": 1.7115418502202644, "grad_norm": 1.8547440453110882, "learning_rate": 1.119749693519121e-06, "loss": 0.6006353497505188, "step": 4857 }, { "epoch": 1.7118942731277533, "grad_norm": 1.7445383281871432, "learning_rate": 1.117072252613467e-06, "loss": 0.5645362138748169, "step": 4858 }, { "epoch": 1.7122466960352423, "grad_norm": 1.928827114023792, "learning_rate": 1.1143978271853362e-06, "loss": 0.46408799290657043, "step": 4859 }, { "epoch": 1.7125991189427312, "grad_norm": 1.9357772553695842, "learning_rate": 1.1117264181426134e-06, "loss": 0.7798513770103455, "step": 4860 }, { "epoch": 1.7129515418502201, "grad_norm": 2.234058868113385, "learning_rate": 1.109058026392158e-06, "loss": 0.739770770072937, "step": 4861 }, { "epoch": 1.7133039647577093, "grad_norm": 1.8311645792398603, "learning_rate": 1.1063926528398062e-06, "loss": 0.567306637763977, "step": 4862 }, { "epoch": 1.7136563876651982, "grad_norm": 1.8983872649225184, "learning_rate": 1.1037302983903686e-06, "loss": 0.5730962753295898, "step": 4863 }, { "epoch": 1.7140088105726872, "grad_norm": 2.0428299761075186, "learning_rate": 1.1010709639476335e-06, "loss": 0.6311475038528442, "step": 4864 }, { "epoch": 1.7143612334801763, "grad_norm": 2.074080543967214, "learning_rate": 1.098414650414359e-06, "loss": 0.5867577791213989, "step": 4865 }, { "epoch": 1.7147136563876653, "grad_norm": 1.7945849101921227, "learning_rate": 1.0957613586922844e-06, "loss": 0.6291393637657166, "step": 4866 }, { "epoch": 1.7150660792951542, "grad_norm": 2.221825931925125, "learning_rate": 1.0931110896821184e-06, "loss": 0.5811575651168823, "step": 4867 }, { "epoch": 1.7154185022026431, "grad_norm": 1.8041589779612486, "learning_rate": 1.0904638442835459e-06, "loss": 0.6340835690498352, "step": 4868 }, { "epoch": 1.715770925110132, "grad_norm": 2.1324283591729696, "learning_rate": 1.087819623395222e-06, "loss": 0.6543419361114502, "step": 4869 }, { "epoch": 1.716123348017621, "grad_norm": 1.7815282855404584, "learning_rate": 1.0851784279147793e-06, "loss": 0.5669729709625244, "step": 4870 }, { "epoch": 1.7164757709251102, "grad_norm": 1.7880383242870224, "learning_rate": 1.08254025873882e-06, "loss": 0.5422554612159729, "step": 4871 }, { "epoch": 1.716828193832599, "grad_norm": 2.1378220532284646, "learning_rate": 1.0799051167629215e-06, "loss": 0.6154215335845947, "step": 4872 }, { "epoch": 1.7171806167400883, "grad_norm": 1.6926103915620132, "learning_rate": 1.0772730028816304e-06, "loss": 0.6306319236755371, "step": 4873 }, { "epoch": 1.7175330396475772, "grad_norm": 1.8857595594505687, "learning_rate": 1.0746439179884716e-06, "loss": 0.6301003694534302, "step": 4874 }, { "epoch": 1.7178854625550661, "grad_norm": 2.02854056964172, "learning_rate": 1.0720178629759347e-06, "loss": 0.5730071067810059, "step": 4875 }, { "epoch": 1.718237885462555, "grad_norm": 1.911878436689674, "learning_rate": 1.0693948387354836e-06, "loss": 0.5330506563186646, "step": 4876 }, { "epoch": 1.718590308370044, "grad_norm": 2.2472032788534033, "learning_rate": 1.0667748461575544e-06, "loss": 0.7724611759185791, "step": 4877 }, { "epoch": 1.718942731277533, "grad_norm": 1.8072854503281317, "learning_rate": 1.0641578861315517e-06, "loss": 0.5415126085281372, "step": 4878 }, { "epoch": 1.7192951541850219, "grad_norm": 1.7491717586336433, "learning_rate": 1.0615439595458554e-06, "loss": 0.4895828664302826, "step": 4879 }, { "epoch": 1.719647577092511, "grad_norm": 2.1761761181791757, "learning_rate": 1.0589330672878084e-06, "loss": 0.6049074530601501, "step": 4880 }, { "epoch": 1.72, "grad_norm": 1.8120989551683504, "learning_rate": 1.056325210243726e-06, "loss": 0.5733378529548645, "step": 4881 }, { "epoch": 1.7203524229074891, "grad_norm": 1.842989042937231, "learning_rate": 1.0537203892989e-06, "loss": 0.6034674644470215, "step": 4882 }, { "epoch": 1.720704845814978, "grad_norm": 1.873470428701205, "learning_rate": 1.0511186053375833e-06, "loss": 0.5282200574874878, "step": 4883 }, { "epoch": 1.721057268722467, "grad_norm": 1.7879116674889537, "learning_rate": 1.0485198592430001e-06, "loss": 0.6331876516342163, "step": 4884 }, { "epoch": 1.721409691629956, "grad_norm": 1.9365084560803385, "learning_rate": 1.045924151897344e-06, "loss": 0.5194844007492065, "step": 4885 }, { "epoch": 1.7217621145374449, "grad_norm": 1.9087945355709668, "learning_rate": 1.0433314841817755e-06, "loss": 0.5496135354042053, "step": 4886 }, { "epoch": 1.7221145374449338, "grad_norm": 2.009884434911672, "learning_rate": 1.0407418569764304e-06, "loss": 0.7871953248977661, "step": 4887 }, { "epoch": 1.7224669603524227, "grad_norm": 2.040889138785673, "learning_rate": 1.0381552711604004e-06, "loss": 0.7199628353118896, "step": 4888 }, { "epoch": 1.722819383259912, "grad_norm": 2.248999014584043, "learning_rate": 1.0355717276117506e-06, "loss": 0.5876469612121582, "step": 4889 }, { "epoch": 1.7231718061674008, "grad_norm": 2.0349261607011, "learning_rate": 1.0329912272075181e-06, "loss": 0.5543426275253296, "step": 4890 }, { "epoch": 1.72352422907489, "grad_norm": 1.583533546685778, "learning_rate": 1.0304137708236994e-06, "loss": 0.5118540525436401, "step": 4891 }, { "epoch": 1.723876651982379, "grad_norm": 1.82424017683773, "learning_rate": 1.0278393593352631e-06, "loss": 0.628477931022644, "step": 4892 }, { "epoch": 1.7242290748898679, "grad_norm": 1.9109773060364437, "learning_rate": 1.0252679936161392e-06, "loss": 0.6119322776794434, "step": 4893 }, { "epoch": 1.7245814977973568, "grad_norm": 1.8450217827392812, "learning_rate": 1.0226996745392259e-06, "loss": 0.7661763429641724, "step": 4894 }, { "epoch": 1.7249339207048457, "grad_norm": 2.1201139928861394, "learning_rate": 1.0201344029763927e-06, "loss": 0.6431440114974976, "step": 4895 }, { "epoch": 1.7252863436123347, "grad_norm": 2.0387248477928503, "learning_rate": 1.0175721797984639e-06, "loss": 0.7295387983322144, "step": 4896 }, { "epoch": 1.7256387665198238, "grad_norm": 1.9561833203401287, "learning_rate": 1.015013005875235e-06, "loss": 0.58225017786026, "step": 4897 }, { "epoch": 1.7259911894273128, "grad_norm": 1.9211243008184207, "learning_rate": 1.0124568820754689e-06, "loss": 0.5467473864555359, "step": 4898 }, { "epoch": 1.7263436123348017, "grad_norm": 2.2453442964094967, "learning_rate": 1.00990380926689e-06, "loss": 0.7637814283370972, "step": 4899 }, { "epoch": 1.7266960352422909, "grad_norm": 2.13267606796778, "learning_rate": 1.0073537883161821e-06, "loss": 0.5354464650154114, "step": 4900 }, { "epoch": 1.7270484581497798, "grad_norm": 1.91187833906973, "learning_rate": 1.0048068200890037e-06, "loss": 0.5213606357574463, "step": 4901 }, { "epoch": 1.7274008810572687, "grad_norm": 1.8770841550484265, "learning_rate": 1.0022629054499678e-06, "loss": 0.6073330640792847, "step": 4902 }, { "epoch": 1.7277533039647577, "grad_norm": 2.1663053459498283, "learning_rate": 9.997220452626587e-07, "loss": 0.5711998343467712, "step": 4903 }, { "epoch": 1.7281057268722466, "grad_norm": 1.8823259072141711, "learning_rate": 9.971842403896137e-07, "loss": 0.6824701428413391, "step": 4904 }, { "epoch": 1.7284581497797356, "grad_norm": 1.844862593672041, "learning_rate": 9.9464949169234e-07, "loss": 0.528059184551239, "step": 4905 }, { "epoch": 1.7288105726872247, "grad_norm": 1.7519423160504919, "learning_rate": 9.92117800031308e-07, "loss": 0.45617133378982544, "step": 4906 }, { "epoch": 1.7291629955947136, "grad_norm": 2.095891000231315, "learning_rate": 9.895891662659485e-07, "loss": 0.6186379194259644, "step": 4907 }, { "epoch": 1.7295154185022028, "grad_norm": 1.8933361504308706, "learning_rate": 9.870635912546511e-07, "loss": 0.622776985168457, "step": 4908 }, { "epoch": 1.7298678414096917, "grad_norm": 2.1556634846751073, "learning_rate": 9.845410758547724e-07, "loss": 0.6322426199913025, "step": 4909 }, { "epoch": 1.7302202643171807, "grad_norm": 1.8637079254212523, "learning_rate": 9.82021620922624e-07, "loss": 0.565685510635376, "step": 4910 }, { "epoch": 1.7305726872246696, "grad_norm": 1.9032887733300228, "learning_rate": 9.795052273134908e-07, "loss": 0.670723557472229, "step": 4911 }, { "epoch": 1.7309251101321586, "grad_norm": 1.818317953069921, "learning_rate": 9.769918958816017e-07, "loss": 0.627914309501648, "step": 4912 }, { "epoch": 1.7312775330396475, "grad_norm": 1.8142433277320784, "learning_rate": 9.74481627480156e-07, "loss": 0.613754391670227, "step": 4913 }, { "epoch": 1.7316299559471364, "grad_norm": 1.6146673255290158, "learning_rate": 9.719744229613148e-07, "loss": 0.7128336429595947, "step": 4914 }, { "epoch": 1.7319823788546256, "grad_norm": 2.07516307915708, "learning_rate": 9.694702831761937e-07, "loss": 0.692448079586029, "step": 4915 }, { "epoch": 1.7323348017621145, "grad_norm": 1.8379288210737326, "learning_rate": 9.669692089748717e-07, "loss": 0.5722585916519165, "step": 4916 }, { "epoch": 1.7326872246696037, "grad_norm": 2.880722779651987, "learning_rate": 9.64471201206385e-07, "loss": 0.5267904996871948, "step": 4917 }, { "epoch": 1.7330396475770926, "grad_norm": 1.8098448963152955, "learning_rate": 9.619762607187277e-07, "loss": 0.6290950179100037, "step": 4918 }, { "epoch": 1.7333920704845815, "grad_norm": 1.6991585212089806, "learning_rate": 9.594843883588588e-07, "loss": 0.5137144327163696, "step": 4919 }, { "epoch": 1.7337444933920705, "grad_norm": 2.0101083451482067, "learning_rate": 9.569955849726875e-07, "loss": 0.6110765337944031, "step": 4920 }, { "epoch": 1.7340969162995594, "grad_norm": 1.805820390142787, "learning_rate": 9.545098514050844e-07, "loss": 0.5097514390945435, "step": 4921 }, { "epoch": 1.7344493392070484, "grad_norm": 1.9204009410934093, "learning_rate": 9.520271884998822e-07, "loss": 0.7220968008041382, "step": 4922 }, { "epoch": 1.7348017621145373, "grad_norm": 1.816061125504689, "learning_rate": 9.495475970998669e-07, "loss": 0.4790550470352173, "step": 4923 }, { "epoch": 1.7351541850220265, "grad_norm": 1.8878679441443287, "learning_rate": 9.470710780467818e-07, "loss": 0.5440540909767151, "step": 4924 }, { "epoch": 1.7355066079295154, "grad_norm": 1.8420075371513611, "learning_rate": 9.445976321813277e-07, "loss": 0.6351054310798645, "step": 4925 }, { "epoch": 1.7358590308370045, "grad_norm": 1.8685391189030902, "learning_rate": 9.421272603431619e-07, "loss": 0.597430944442749, "step": 4926 }, { "epoch": 1.7362114537444935, "grad_norm": 1.8993591697635552, "learning_rate": 9.396599633709013e-07, "loss": 0.5826110243797302, "step": 4927 }, { "epoch": 1.7365638766519824, "grad_norm": 1.9528322527669026, "learning_rate": 9.371957421021116e-07, "loss": 0.61531662940979, "step": 4928 }, { "epoch": 1.7369162995594714, "grad_norm": 1.7976479809998938, "learning_rate": 9.347345973733257e-07, "loss": 0.5286549925804138, "step": 4929 }, { "epoch": 1.7372687224669603, "grad_norm": 2.051327926584316, "learning_rate": 9.322765300200209e-07, "loss": 0.6923980712890625, "step": 4930 }, { "epoch": 1.7376211453744492, "grad_norm": 1.8765754964403032, "learning_rate": 9.298215408766376e-07, "loss": 0.5408697128295898, "step": 4931 }, { "epoch": 1.7379735682819382, "grad_norm": 1.9428832757254997, "learning_rate": 9.273696307765656e-07, "loss": 0.6360228061676025, "step": 4932 }, { "epoch": 1.7383259911894273, "grad_norm": 1.5478222777536266, "learning_rate": 9.249208005521538e-07, "loss": 0.46559634804725647, "step": 4933 }, { "epoch": 1.7386784140969163, "grad_norm": 2.0814940983294465, "learning_rate": 9.224750510347036e-07, "loss": 0.6065478324890137, "step": 4934 }, { "epoch": 1.7390308370044054, "grad_norm": 2.197942688439507, "learning_rate": 9.2003238305447e-07, "loss": 0.6777745485305786, "step": 4935 }, { "epoch": 1.7393832599118944, "grad_norm": 1.764242470379209, "learning_rate": 9.175927974406607e-07, "loss": 0.568982720375061, "step": 4936 }, { "epoch": 1.7397356828193833, "grad_norm": 1.9082270198240563, "learning_rate": 9.151562950214443e-07, "loss": 0.6014461517333984, "step": 4937 }, { "epoch": 1.7400881057268722, "grad_norm": 1.9463215063568118, "learning_rate": 9.127228766239349e-07, "loss": 0.6312133073806763, "step": 4938 }, { "epoch": 1.7404405286343612, "grad_norm": 1.9066118382891128, "learning_rate": 9.102925430742015e-07, "loss": 0.5440298318862915, "step": 4939 }, { "epoch": 1.74079295154185, "grad_norm": 1.9115402376997355, "learning_rate": 9.078652951972688e-07, "loss": 0.6599005460739136, "step": 4940 }, { "epoch": 1.7411453744493393, "grad_norm": 1.8987879122247575, "learning_rate": 9.054411338171099e-07, "loss": 0.6719228625297546, "step": 4941 }, { "epoch": 1.7414977973568282, "grad_norm": 1.7692389966879711, "learning_rate": 9.030200597566529e-07, "loss": 0.5771356821060181, "step": 4942 }, { "epoch": 1.7418502202643171, "grad_norm": 2.0029197465912936, "learning_rate": 9.006020738377764e-07, "loss": 0.5066591501235962, "step": 4943 }, { "epoch": 1.7422026431718063, "grad_norm": 1.754361693598564, "learning_rate": 8.981871768813111e-07, "loss": 0.5091663002967834, "step": 4944 }, { "epoch": 1.7425550660792952, "grad_norm": 1.9092674317256029, "learning_rate": 8.957753697070415e-07, "loss": 0.6594514846801758, "step": 4945 }, { "epoch": 1.7429074889867842, "grad_norm": 1.8033652679865708, "learning_rate": 8.933666531337004e-07, "loss": 0.5485379695892334, "step": 4946 }, { "epoch": 1.743259911894273, "grad_norm": 2.2602019905537913, "learning_rate": 8.909610279789716e-07, "loss": 0.6079416871070862, "step": 4947 }, { "epoch": 1.743612334801762, "grad_norm": 1.8415960205262154, "learning_rate": 8.885584950594894e-07, "loss": 0.4980606436729431, "step": 4948 }, { "epoch": 1.743964757709251, "grad_norm": 1.5880176897451332, "learning_rate": 8.861590551908405e-07, "loss": 0.47701022028923035, "step": 4949 }, { "epoch": 1.7443171806167401, "grad_norm": 1.7223149872435417, "learning_rate": 8.837627091875578e-07, "loss": 0.5041281580924988, "step": 4950 }, { "epoch": 1.744669603524229, "grad_norm": 1.9666236461253934, "learning_rate": 8.813694578631283e-07, "loss": 0.5477255582809448, "step": 4951 }, { "epoch": 1.7450220264317182, "grad_norm": 1.883766477051188, "learning_rate": 8.78979302029983e-07, "loss": 0.6377973556518555, "step": 4952 }, { "epoch": 1.7453744493392072, "grad_norm": 1.940207867324299, "learning_rate": 8.76592242499511e-07, "loss": 0.6688166856765747, "step": 4953 }, { "epoch": 1.745726872246696, "grad_norm": 2.0031898505950907, "learning_rate": 8.742082800820406e-07, "loss": 0.6236848831176758, "step": 4954 }, { "epoch": 1.746079295154185, "grad_norm": 1.7582600318717108, "learning_rate": 8.718274155868545e-07, "loss": 0.653768002986908, "step": 4955 }, { "epoch": 1.746431718061674, "grad_norm": 1.844534933556578, "learning_rate": 8.694496498221805e-07, "loss": 0.5647604465484619, "step": 4956 }, { "epoch": 1.746784140969163, "grad_norm": 1.781932697931349, "learning_rate": 8.670749835951964e-07, "loss": 0.4960663914680481, "step": 4957 }, { "epoch": 1.7471365638766518, "grad_norm": 1.6873484879529697, "learning_rate": 8.647034177120317e-07, "loss": 0.6271536350250244, "step": 4958 }, { "epoch": 1.747488986784141, "grad_norm": 2.0059254125224757, "learning_rate": 8.623349529777525e-07, "loss": 0.6323459148406982, "step": 4959 }, { "epoch": 1.74784140969163, "grad_norm": 1.9564636362517054, "learning_rate": 8.599695901963811e-07, "loss": 0.6084197163581848, "step": 4960 }, { "epoch": 1.748193832599119, "grad_norm": 1.8913653459936526, "learning_rate": 8.576073301708876e-07, "loss": 0.48974379897117615, "step": 4961 }, { "epoch": 1.748546255506608, "grad_norm": 1.8735173678444992, "learning_rate": 8.552481737031859e-07, "loss": 0.5985081195831299, "step": 4962 }, { "epoch": 1.748898678414097, "grad_norm": 1.6360789306706147, "learning_rate": 8.528921215941299e-07, "loss": 0.507872998714447, "step": 4963 }, { "epoch": 1.749251101321586, "grad_norm": 1.5251403239052872, "learning_rate": 8.50539174643531e-07, "loss": 0.5772356986999512, "step": 4964 }, { "epoch": 1.7496035242290748, "grad_norm": 2.222117569410965, "learning_rate": 8.48189333650139e-07, "loss": 0.675100564956665, "step": 4965 }, { "epoch": 1.7499559471365638, "grad_norm": 1.9356078104678653, "learning_rate": 8.458425994116582e-07, "loss": 0.5571645498275757, "step": 4966 }, { "epoch": 1.7503083700440527, "grad_norm": 1.807660183683072, "learning_rate": 8.434989727247233e-07, "loss": 0.5842185020446777, "step": 4967 }, { "epoch": 1.7506607929515419, "grad_norm": 1.7960899956397995, "learning_rate": 8.41158454384925e-07, "loss": 0.5693016648292542, "step": 4968 }, { "epoch": 1.7510132158590308, "grad_norm": 1.808037504366546, "learning_rate": 8.388210451868006e-07, "loss": 0.5791449546813965, "step": 4969 }, { "epoch": 1.75136563876652, "grad_norm": 2.1439820497437516, "learning_rate": 8.364867459238257e-07, "loss": 0.4873960018157959, "step": 4970 }, { "epoch": 1.751718061674009, "grad_norm": 1.6712365329059415, "learning_rate": 8.341555573884175e-07, "loss": 0.609403669834137, "step": 4971 }, { "epoch": 1.7520704845814978, "grad_norm": 2.0664225342752327, "learning_rate": 8.318274803719483e-07, "loss": 0.5676242113113403, "step": 4972 }, { "epoch": 1.7524229074889868, "grad_norm": 2.2550971825464026, "learning_rate": 8.29502515664723e-07, "loss": 0.7692728638648987, "step": 4973 }, { "epoch": 1.7527753303964757, "grad_norm": 2.318073308236361, "learning_rate": 8.27180664056001e-07, "loss": 0.7940253019332886, "step": 4974 }, { "epoch": 1.7531277533039646, "grad_norm": 2.021077548315, "learning_rate": 8.24861926333973e-07, "loss": 0.5784735083580017, "step": 4975 }, { "epoch": 1.7534801762114536, "grad_norm": 2.106016882372918, "learning_rate": 8.225463032857783e-07, "loss": 0.6493539810180664, "step": 4976 }, { "epoch": 1.7538325991189427, "grad_norm": 1.6893816606485224, "learning_rate": 8.202337956975026e-07, "loss": 0.615519106388092, "step": 4977 }, { "epoch": 1.7541850220264317, "grad_norm": 2.4337358559529587, "learning_rate": 8.179244043541678e-07, "loss": 0.5369104146957397, "step": 4978 }, { "epoch": 1.7545374449339208, "grad_norm": 1.8845170170566812, "learning_rate": 8.156181300397414e-07, "loss": 0.5527158975601196, "step": 4979 }, { "epoch": 1.7548898678414098, "grad_norm": 2.1597753145956786, "learning_rate": 8.133149735371316e-07, "loss": 0.5870147943496704, "step": 4980 }, { "epoch": 1.7552422907488987, "grad_norm": 2.0333589118991497, "learning_rate": 8.110149356281848e-07, "loss": 0.7235025763511658, "step": 4981 }, { "epoch": 1.7555947136563876, "grad_norm": 1.9283097758260628, "learning_rate": 8.087180170937004e-07, "loss": 0.5630521774291992, "step": 4982 }, { "epoch": 1.7559471365638766, "grad_norm": 2.015740627515862, "learning_rate": 8.06424218713403e-07, "loss": 0.5005021691322327, "step": 4983 }, { "epoch": 1.7562995594713655, "grad_norm": 2.0683486617790066, "learning_rate": 8.041335412659679e-07, "loss": 0.7267229557037354, "step": 4984 }, { "epoch": 1.7566519823788547, "grad_norm": 2.2397406108409834, "learning_rate": 8.018459855290107e-07, "loss": 0.6494802236557007, "step": 4985 }, { "epoch": 1.7570044052863436, "grad_norm": 1.8012009390187627, "learning_rate": 7.995615522790845e-07, "loss": 0.5637267827987671, "step": 4986 }, { "epoch": 1.7573568281938328, "grad_norm": 1.807872858711751, "learning_rate": 7.972802422916826e-07, "loss": 0.5143958330154419, "step": 4987 }, { "epoch": 1.7577092511013217, "grad_norm": 1.7925007157989583, "learning_rate": 7.950020563412398e-07, "loss": 0.607841968536377, "step": 4988 }, { "epoch": 1.7580616740088106, "grad_norm": 1.9011698158798267, "learning_rate": 7.927269952011285e-07, "loss": 0.6066895723342896, "step": 4989 }, { "epoch": 1.7584140969162996, "grad_norm": 2.293924542695718, "learning_rate": 7.904550596436611e-07, "loss": 0.6686232686042786, "step": 4990 }, { "epoch": 1.7587665198237885, "grad_norm": 1.7540251789370713, "learning_rate": 7.881862504400884e-07, "loss": 0.589708685874939, "step": 4991 }, { "epoch": 1.7591189427312774, "grad_norm": 1.9346002211307631, "learning_rate": 7.859205683606008e-07, "loss": 0.7008450031280518, "step": 4992 }, { "epoch": 1.7594713656387664, "grad_norm": 1.5488386957340947, "learning_rate": 7.836580141743289e-07, "loss": 0.5754648447036743, "step": 4993 }, { "epoch": 1.7598237885462555, "grad_norm": 1.8204543329281522, "learning_rate": 7.81398588649338e-07, "loss": 0.5756049156188965, "step": 4994 }, { "epoch": 1.7601762114537445, "grad_norm": 1.8754803653843481, "learning_rate": 7.791422925526326e-07, "loss": 0.6143715381622314, "step": 4995 }, { "epoch": 1.7605286343612336, "grad_norm": 1.9795958910244131, "learning_rate": 7.768891266501544e-07, "loss": 0.700069010257721, "step": 4996 }, { "epoch": 1.7608810572687226, "grad_norm": 1.8030282940418303, "learning_rate": 7.746390917067847e-07, "loss": 0.5200002193450928, "step": 4997 }, { "epoch": 1.7612334801762115, "grad_norm": 2.0811179040330483, "learning_rate": 7.723921884863395e-07, "loss": 0.6963525414466858, "step": 4998 }, { "epoch": 1.7615859030837004, "grad_norm": 1.9255908471526815, "learning_rate": 7.701484177515717e-07, "loss": 0.6329556703567505, "step": 4999 }, { "epoch": 1.7619383259911894, "grad_norm": 2.0796773022688213, "learning_rate": 7.67907780264171e-07, "loss": 0.6980677247047424, "step": 5000 }, { "epoch": 1.7622907488986783, "grad_norm": 1.95091452058077, "learning_rate": 7.656702767847679e-07, "loss": 0.5244314670562744, "step": 5001 }, { "epoch": 1.7626431718061673, "grad_norm": 1.937585844549177, "learning_rate": 7.634359080729215e-07, "loss": 0.6679523587226868, "step": 5002 }, { "epoch": 1.7629955947136564, "grad_norm": 1.7698344536731299, "learning_rate": 7.612046748871327e-07, "loss": 0.6168316602706909, "step": 5003 }, { "epoch": 1.7633480176211453, "grad_norm": 1.8295319189191592, "learning_rate": 7.589765779848346e-07, "loss": 0.5892738699913025, "step": 5004 }, { "epoch": 1.7637004405286345, "grad_norm": 1.8270406797726577, "learning_rate": 7.567516181223966e-07, "loss": 0.6714082956314087, "step": 5005 }, { "epoch": 1.7640528634361234, "grad_norm": 1.7798086214061835, "learning_rate": 7.545297960551245e-07, "loss": 0.6327016353607178, "step": 5006 }, { "epoch": 1.7644052863436124, "grad_norm": 1.8272907155681217, "learning_rate": 7.52311112537254e-07, "loss": 0.5114126205444336, "step": 5007 }, { "epoch": 1.7647577092511013, "grad_norm": 1.9198067827489789, "learning_rate": 7.500955683219646e-07, "loss": 0.5701695084571838, "step": 5008 }, { "epoch": 1.7651101321585903, "grad_norm": 1.7304483866926885, "learning_rate": 7.478831641613616e-07, "loss": 0.5966283082962036, "step": 5009 }, { "epoch": 1.7654625550660792, "grad_norm": 1.7690414353003558, "learning_rate": 7.456739008064883e-07, "loss": 0.6219101548194885, "step": 5010 }, { "epoch": 1.7658149779735681, "grad_norm": 2.1971226449232804, "learning_rate": 7.434677790073197e-07, "loss": 0.6516324877738953, "step": 5011 }, { "epoch": 1.7661674008810573, "grad_norm": 2.0945250680543395, "learning_rate": 7.412647995127664e-07, "loss": 0.4623621106147766, "step": 5012 }, { "epoch": 1.7665198237885462, "grad_norm": 1.7568345992089816, "learning_rate": 7.390649630706703e-07, "loss": 0.5661109685897827, "step": 5013 }, { "epoch": 1.7668722466960354, "grad_norm": 2.0070117088967154, "learning_rate": 7.368682704278096e-07, "loss": 0.47063148021698, "step": 5014 }, { "epoch": 1.7672246696035243, "grad_norm": 1.636187219475051, "learning_rate": 7.346747223298889e-07, "loss": 0.5684597492218018, "step": 5015 }, { "epoch": 1.7675770925110132, "grad_norm": 1.872749765270047, "learning_rate": 7.324843195215548e-07, "loss": 0.5614477396011353, "step": 5016 }, { "epoch": 1.7679295154185022, "grad_norm": 1.9944667195924293, "learning_rate": 7.302970627463779e-07, "loss": 0.508664608001709, "step": 5017 }, { "epoch": 1.7682819383259911, "grad_norm": 1.9918093815103546, "learning_rate": 7.281129527468645e-07, "loss": 0.5348209142684937, "step": 5018 }, { "epoch": 1.76863436123348, "grad_norm": 2.2774118234615695, "learning_rate": 7.259319902644513e-07, "loss": 0.6441121101379395, "step": 5019 }, { "epoch": 1.7689867841409692, "grad_norm": 1.7776640162425583, "learning_rate": 7.237541760395083e-07, "loss": 0.6454842686653137, "step": 5020 }, { "epoch": 1.7693392070484582, "grad_norm": 1.818033997112941, "learning_rate": 7.215795108113343e-07, "loss": 0.4822286367416382, "step": 5021 }, { "epoch": 1.769691629955947, "grad_norm": 2.2519074742911775, "learning_rate": 7.19407995318162e-07, "loss": 0.6078327894210815, "step": 5022 }, { "epoch": 1.7700440528634362, "grad_norm": 1.9964867958416748, "learning_rate": 7.172396302971507e-07, "loss": 0.6394459009170532, "step": 5023 }, { "epoch": 1.7703964757709252, "grad_norm": 1.919321953608054, "learning_rate": 7.150744164843959e-07, "loss": 0.646416425704956, "step": 5024 }, { "epoch": 1.7707488986784141, "grad_norm": 1.743918601710363, "learning_rate": 7.129123546149208e-07, "loss": 0.6265356540679932, "step": 5025 }, { "epoch": 1.771101321585903, "grad_norm": 1.717725969603381, "learning_rate": 7.107534454226728e-07, "loss": 0.5074717998504639, "step": 5026 }, { "epoch": 1.771453744493392, "grad_norm": 1.9181838757933405, "learning_rate": 7.0859768964054e-07, "loss": 0.7036402821540833, "step": 5027 }, { "epoch": 1.771806167400881, "grad_norm": 1.7638856276686163, "learning_rate": 7.064450880003327e-07, "loss": 0.6098893880844116, "step": 5028 }, { "epoch": 1.77215859030837, "grad_norm": 2.005026773406909, "learning_rate": 7.042956412327917e-07, "loss": 0.582880973815918, "step": 5029 }, { "epoch": 1.772511013215859, "grad_norm": 2.013313109536588, "learning_rate": 7.021493500675869e-07, "loss": 0.6003242135047913, "step": 5030 }, { "epoch": 1.7728634361233482, "grad_norm": 1.9319887994625418, "learning_rate": 7.000062152333165e-07, "loss": 0.4999944865703583, "step": 5031 }, { "epoch": 1.7732158590308371, "grad_norm": 1.8450299102376384, "learning_rate": 6.978662374575107e-07, "loss": 0.5569149255752563, "step": 5032 }, { "epoch": 1.773568281938326, "grad_norm": 1.9277460192299252, "learning_rate": 6.957294174666263e-07, "loss": 0.5600287914276123, "step": 5033 }, { "epoch": 1.773920704845815, "grad_norm": 1.8890013971887576, "learning_rate": 6.935957559860418e-07, "loss": 0.5412951707839966, "step": 5034 }, { "epoch": 1.774273127753304, "grad_norm": 1.7378105888388657, "learning_rate": 6.914652537400735e-07, "loss": 0.5881151556968689, "step": 5035 }, { "epoch": 1.7746255506607929, "grad_norm": 1.8829243382985155, "learning_rate": 6.893379114519572e-07, "loss": 0.5975406169891357, "step": 5036 }, { "epoch": 1.7749779735682818, "grad_norm": 1.7883517993987919, "learning_rate": 6.872137298438653e-07, "loss": 0.6266802549362183, "step": 5037 }, { "epoch": 1.775330396475771, "grad_norm": 2.279148556628154, "learning_rate": 6.850927096368854e-07, "loss": 0.6825709939002991, "step": 5038 }, { "epoch": 1.77568281938326, "grad_norm": 1.6068572613194736, "learning_rate": 6.829748515510381e-07, "loss": 0.6035742163658142, "step": 5039 }, { "epoch": 1.776035242290749, "grad_norm": 1.901514453732062, "learning_rate": 6.808601563052742e-07, "loss": 0.6665611267089844, "step": 5040 }, { "epoch": 1.776387665198238, "grad_norm": 2.334324554300087, "learning_rate": 6.787486246174657e-07, "loss": 0.8202367424964905, "step": 5041 }, { "epoch": 1.776740088105727, "grad_norm": 1.8080635950130315, "learning_rate": 6.766402572044084e-07, "loss": 0.6516656875610352, "step": 5042 }, { "epoch": 1.7770925110132159, "grad_norm": 1.6361942373114873, "learning_rate": 6.745350547818307e-07, "loss": 0.663591742515564, "step": 5043 }, { "epoch": 1.7774449339207048, "grad_norm": 2.0460511379273716, "learning_rate": 6.724330180643824e-07, "loss": 0.6025142669677734, "step": 5044 }, { "epoch": 1.7777973568281937, "grad_norm": 1.6332878492082579, "learning_rate": 6.703341477656422e-07, "loss": 0.5704027414321899, "step": 5045 }, { "epoch": 1.7781497797356827, "grad_norm": 2.0053343984683534, "learning_rate": 6.682384445981071e-07, "loss": 0.6518473625183105, "step": 5046 }, { "epoch": 1.7785022026431718, "grad_norm": 1.6878153153712165, "learning_rate": 6.661459092732037e-07, "loss": 0.5547574758529663, "step": 5047 }, { "epoch": 1.7788546255506608, "grad_norm": 1.8096814000573205, "learning_rate": 6.640565425012846e-07, "loss": 0.6248831748962402, "step": 5048 }, { "epoch": 1.77920704845815, "grad_norm": 1.8747085080187502, "learning_rate": 6.619703449916259e-07, "loss": 0.5899701118469238, "step": 5049 }, { "epoch": 1.7795594713656389, "grad_norm": 1.9253293216058311, "learning_rate": 6.598873174524223e-07, "loss": 0.41864174604415894, "step": 5050 }, { "epoch": 1.7799118942731278, "grad_norm": 2.2457701854009025, "learning_rate": 6.578074605908002e-07, "loss": 0.7473436594009399, "step": 5051 }, { "epoch": 1.7802643171806167, "grad_norm": 1.6599111795216646, "learning_rate": 6.557307751128051e-07, "loss": 0.49480879306793213, "step": 5052 }, { "epoch": 1.7806167400881057, "grad_norm": 1.8257078701065834, "learning_rate": 6.536572617234082e-07, "loss": 0.5619323253631592, "step": 5053 }, { "epoch": 1.7809691629955946, "grad_norm": 1.8566139978409217, "learning_rate": 6.515869211265013e-07, "loss": 0.5271984338760376, "step": 5054 }, { "epoch": 1.7813215859030835, "grad_norm": 1.967436768949709, "learning_rate": 6.495197540248999e-07, "loss": 0.6544383764266968, "step": 5055 }, { "epoch": 1.7816740088105727, "grad_norm": 2.157946298106486, "learning_rate": 6.474557611203458e-07, "loss": 0.6525388956069946, "step": 5056 }, { "epoch": 1.7820264317180616, "grad_norm": 2.0314482863762735, "learning_rate": 6.453949431134987e-07, "loss": 0.5509910583496094, "step": 5057 }, { "epoch": 1.7823788546255508, "grad_norm": 1.6067790596532618, "learning_rate": 6.433373007039412e-07, "loss": 0.5030776262283325, "step": 5058 }, { "epoch": 1.7827312775330397, "grad_norm": 1.875686429811456, "learning_rate": 6.412828345901811e-07, "loss": 0.6743696331977844, "step": 5059 }, { "epoch": 1.7830837004405287, "grad_norm": 1.9399780429001139, "learning_rate": 6.392315454696452e-07, "loss": 0.5395437479019165, "step": 5060 }, { "epoch": 1.7834361233480176, "grad_norm": 1.7657846282567238, "learning_rate": 6.371834340386807e-07, "loss": 0.5773402452468872, "step": 5061 }, { "epoch": 1.7837885462555065, "grad_norm": 1.920136830142019, "learning_rate": 6.351385009925582e-07, "loss": 0.6014268398284912, "step": 5062 }, { "epoch": 1.7841409691629955, "grad_norm": 1.9465884411051106, "learning_rate": 6.33096747025469e-07, "loss": 0.5519139170646667, "step": 5063 }, { "epoch": 1.7844933920704846, "grad_norm": 3.0085962631929752, "learning_rate": 6.310581728305254e-07, "loss": 0.5407502055168152, "step": 5064 }, { "epoch": 1.7848458149779736, "grad_norm": 1.5371833099084395, "learning_rate": 6.290227790997605e-07, "loss": 0.61688232421875, "step": 5065 }, { "epoch": 1.7851982378854625, "grad_norm": 2.002396471657761, "learning_rate": 6.269905665241271e-07, "loss": 0.5212849974632263, "step": 5066 }, { "epoch": 1.7855506607929517, "grad_norm": 1.7684490871986807, "learning_rate": 6.249615357934968e-07, "loss": 0.6827710866928101, "step": 5067 }, { "epoch": 1.7859030837004406, "grad_norm": 2.016669351586175, "learning_rate": 6.22935687596663e-07, "loss": 0.6907633543014526, "step": 5068 }, { "epoch": 1.7862555066079295, "grad_norm": 2.045834595721204, "learning_rate": 6.209130226213378e-07, "loss": 0.5707769989967346, "step": 5069 }, { "epoch": 1.7866079295154185, "grad_norm": 1.9432188628486171, "learning_rate": 6.188935415541541e-07, "loss": 0.6062690019607544, "step": 5070 }, { "epoch": 1.7869603524229074, "grad_norm": 1.8744219034756735, "learning_rate": 6.168772450806604e-07, "loss": 0.5291163921356201, "step": 5071 }, { "epoch": 1.7873127753303963, "grad_norm": 1.8892054954511246, "learning_rate": 6.148641338853301e-07, "loss": 0.6324198246002197, "step": 5072 }, { "epoch": 1.7876651982378855, "grad_norm": 1.7030219876612867, "learning_rate": 6.128542086515499e-07, "loss": 0.5516111850738525, "step": 5073 }, { "epoch": 1.7880176211453744, "grad_norm": 2.1800478368143232, "learning_rate": 6.108474700616263e-07, "loss": 0.6384079456329346, "step": 5074 }, { "epoch": 1.7883700440528636, "grad_norm": 1.777234944410244, "learning_rate": 6.088439187967865e-07, "loss": 0.5699876546859741, "step": 5075 }, { "epoch": 1.7887224669603525, "grad_norm": 2.081274535023766, "learning_rate": 6.06843555537171e-07, "loss": 0.6068697571754456, "step": 5076 }, { "epoch": 1.7890748898678415, "grad_norm": 2.1233392160842066, "learning_rate": 6.048463809618444e-07, "loss": 0.6254304647445679, "step": 5077 }, { "epoch": 1.7894273127753304, "grad_norm": 2.0059926594667914, "learning_rate": 6.02852395748782e-07, "loss": 0.6779477596282959, "step": 5078 }, { "epoch": 1.7897797356828193, "grad_norm": 1.8024145072939486, "learning_rate": 6.008616005748802e-07, "loss": 0.6139817833900452, "step": 5079 }, { "epoch": 1.7901321585903083, "grad_norm": 2.042935872875493, "learning_rate": 5.988739961159539e-07, "loss": 0.553310215473175, "step": 5080 }, { "epoch": 1.7904845814977972, "grad_norm": 1.9543566497010472, "learning_rate": 5.968895830467325e-07, "loss": 0.6093542575836182, "step": 5081 }, { "epoch": 1.7908370044052864, "grad_norm": 1.8231021161772492, "learning_rate": 5.949083620408614e-07, "loss": 0.6224432587623596, "step": 5082 }, { "epoch": 1.7911894273127753, "grad_norm": 1.881995664144807, "learning_rate": 5.929303337709047e-07, "loss": 0.6155597567558289, "step": 5083 }, { "epoch": 1.7915418502202645, "grad_norm": 1.7127795559170356, "learning_rate": 5.909554989083411e-07, "loss": 0.5742098093032837, "step": 5084 }, { "epoch": 1.7918942731277534, "grad_norm": 2.1579790645115886, "learning_rate": 5.889838581235641e-07, "loss": 0.7427949905395508, "step": 5085 }, { "epoch": 1.7922466960352423, "grad_norm": 1.8686834683482023, "learning_rate": 5.870154120858851e-07, "loss": 0.48208528757095337, "step": 5086 }, { "epoch": 1.7925991189427313, "grad_norm": 2.103622298674757, "learning_rate": 5.850501614635318e-07, "loss": 0.48402148485183716, "step": 5087 }, { "epoch": 1.7929515418502202, "grad_norm": 1.9085757415865392, "learning_rate": 5.83088106923646e-07, "loss": 0.6808921694755554, "step": 5088 }, { "epoch": 1.7933039647577091, "grad_norm": 1.4851842618773352, "learning_rate": 5.811292491322795e-07, "loss": 0.48358428478240967, "step": 5089 }, { "epoch": 1.793656387665198, "grad_norm": 1.801328000774117, "learning_rate": 5.791735887544081e-07, "loss": 0.6492827534675598, "step": 5090 }, { "epoch": 1.7940088105726872, "grad_norm": 1.542873674028149, "learning_rate": 5.772211264539162e-07, "loss": 0.5453791618347168, "step": 5091 }, { "epoch": 1.7943612334801762, "grad_norm": 1.780642500081645, "learning_rate": 5.75271862893605e-07, "loss": 0.5901151895523071, "step": 5092 }, { "epoch": 1.7947136563876653, "grad_norm": 2.0888993209852664, "learning_rate": 5.73325798735187e-07, "loss": 0.616302490234375, "step": 5093 }, { "epoch": 1.7950660792951543, "grad_norm": 1.7666548150635142, "learning_rate": 5.713829346392907e-07, "loss": 0.616886556148529, "step": 5094 }, { "epoch": 1.7954185022026432, "grad_norm": 2.1253066780397725, "learning_rate": 5.694432712654597e-07, "loss": 0.5552375316619873, "step": 5095 }, { "epoch": 1.7957709251101321, "grad_norm": 1.9305053090727797, "learning_rate": 5.675068092721491e-07, "loss": 0.5956143736839294, "step": 5096 }, { "epoch": 1.796123348017621, "grad_norm": 2.0198097994194675, "learning_rate": 5.655735493167247e-07, "loss": 0.5870288610458374, "step": 5097 }, { "epoch": 1.79647577092511, "grad_norm": 1.737470684820577, "learning_rate": 5.636434920554701e-07, "loss": 0.5325669646263123, "step": 5098 }, { "epoch": 1.7968281938325992, "grad_norm": 1.9881595702868853, "learning_rate": 5.617166381435813e-07, "loss": 0.5931425094604492, "step": 5099 }, { "epoch": 1.797180616740088, "grad_norm": 1.9607916445612916, "learning_rate": 5.597929882351627e-07, "loss": 0.5755603313446045, "step": 5100 }, { "epoch": 1.797533039647577, "grad_norm": 2.000480246693455, "learning_rate": 5.578725429832344e-07, "loss": 0.5780980587005615, "step": 5101 }, { "epoch": 1.7978854625550662, "grad_norm": 1.9982279321373282, "learning_rate": 5.559553030397258e-07, "loss": 0.5863890647888184, "step": 5102 }, { "epoch": 1.7982378854625551, "grad_norm": 1.8196971349794717, "learning_rate": 5.540412690554842e-07, "loss": 0.5577390789985657, "step": 5103 }, { "epoch": 1.798590308370044, "grad_norm": 1.773628551628446, "learning_rate": 5.521304416802642e-07, "loss": 0.5994857549667358, "step": 5104 }, { "epoch": 1.798942731277533, "grad_norm": 1.8364843823531443, "learning_rate": 5.502228215627281e-07, "loss": 0.6065348982810974, "step": 5105 }, { "epoch": 1.799295154185022, "grad_norm": 1.9447341697044171, "learning_rate": 5.483184093504568e-07, "loss": 0.5390498638153076, "step": 5106 }, { "epoch": 1.7996475770925109, "grad_norm": 1.9731136151561257, "learning_rate": 5.464172056899364e-07, "loss": 0.5826783180236816, "step": 5107 }, { "epoch": 1.8, "grad_norm": 1.7733740837200977, "learning_rate": 5.445192112265718e-07, "loss": 0.5429874658584595, "step": 5108 }, { "epoch": 1.800352422907489, "grad_norm": 1.8521585290179927, "learning_rate": 5.426244266046676e-07, "loss": 0.5591466426849365, "step": 5109 }, { "epoch": 1.8007048458149781, "grad_norm": 1.6996794293630604, "learning_rate": 5.407328524674449e-07, "loss": 0.5351911187171936, "step": 5110 }, { "epoch": 1.801057268722467, "grad_norm": 1.9525068150093072, "learning_rate": 5.388444894570378e-07, "loss": 0.6095720529556274, "step": 5111 }, { "epoch": 1.801409691629956, "grad_norm": 1.9048124225268466, "learning_rate": 5.369593382144844e-07, "loss": 0.6278849840164185, "step": 5112 }, { "epoch": 1.801762114537445, "grad_norm": 1.932605893192458, "learning_rate": 5.350773993797332e-07, "loss": 0.6787056922912598, "step": 5113 }, { "epoch": 1.8021145374449339, "grad_norm": 1.7901749162387552, "learning_rate": 5.331986735916461e-07, "loss": 0.6054684519767761, "step": 5114 }, { "epoch": 1.8024669603524228, "grad_norm": 1.9918768270140568, "learning_rate": 5.31323161487991e-07, "loss": 0.5039973855018616, "step": 5115 }, { "epoch": 1.8028193832599118, "grad_norm": 2.1203502988203207, "learning_rate": 5.294508637054474e-07, "loss": 0.6306504011154175, "step": 5116 }, { "epoch": 1.803171806167401, "grad_norm": 1.5433818431075417, "learning_rate": 5.275817808796013e-07, "loss": 0.5654761791229248, "step": 5117 }, { "epoch": 1.8035242290748899, "grad_norm": 1.84553610812893, "learning_rate": 5.257159136449452e-07, "loss": 0.5801905989646912, "step": 5118 }, { "epoch": 1.803876651982379, "grad_norm": 1.9190330109285871, "learning_rate": 5.238532626348891e-07, "loss": 0.6565619707107544, "step": 5119 }, { "epoch": 1.804229074889868, "grad_norm": 2.043183915925982, "learning_rate": 5.219938284817416e-07, "loss": 0.5923253297805786, "step": 5120 }, { "epoch": 1.8045814977973569, "grad_norm": 2.0522176560055647, "learning_rate": 5.2013761181672e-07, "loss": 0.6697949171066284, "step": 5121 }, { "epoch": 1.8049339207048458, "grad_norm": 1.5694231089682613, "learning_rate": 5.182846132699571e-07, "loss": 0.5146230459213257, "step": 5122 }, { "epoch": 1.8052863436123348, "grad_norm": 1.8882278421308176, "learning_rate": 5.16434833470485e-07, "loss": 0.5928882360458374, "step": 5123 }, { "epoch": 1.8056387665198237, "grad_norm": 1.8209325836560148, "learning_rate": 5.145882730462481e-07, "loss": 0.6114771366119385, "step": 5124 }, { "epoch": 1.8059911894273126, "grad_norm": 2.0596769025893122, "learning_rate": 5.127449326240952e-07, "loss": 0.6624642014503479, "step": 5125 }, { "epoch": 1.8063436123348018, "grad_norm": 1.6177669824438379, "learning_rate": 5.109048128297822e-07, "loss": 0.6277980208396912, "step": 5126 }, { "epoch": 1.8066960352422907, "grad_norm": 1.8432956331440709, "learning_rate": 5.090679142879751e-07, "loss": 0.6470246911048889, "step": 5127 }, { "epoch": 1.8070484581497799, "grad_norm": 1.9361376318593135, "learning_rate": 5.072342376222438e-07, "loss": 0.6418337821960449, "step": 5128 }, { "epoch": 1.8074008810572688, "grad_norm": 1.7303831881097942, "learning_rate": 5.054037834550596e-07, "loss": 0.6013847589492798, "step": 5129 }, { "epoch": 1.8077533039647578, "grad_norm": 2.0870369514809086, "learning_rate": 5.035765524078095e-07, "loss": 0.5354605913162231, "step": 5130 }, { "epoch": 1.8081057268722467, "grad_norm": 1.7245482885328716, "learning_rate": 5.01752545100781e-07, "loss": 0.6017459034919739, "step": 5131 }, { "epoch": 1.8084581497797356, "grad_norm": 2.1853671040659335, "learning_rate": 4.999317621531663e-07, "loss": 0.5929696559906006, "step": 5132 }, { "epoch": 1.8088105726872246, "grad_norm": 2.1106102623060723, "learning_rate": 4.981142041830645e-07, "loss": 0.6444251537322998, "step": 5133 }, { "epoch": 1.8091629955947135, "grad_norm": 1.9231094224982612, "learning_rate": 4.962998718074807e-07, "loss": 0.5854116678237915, "step": 5134 }, { "epoch": 1.8095154185022027, "grad_norm": 1.674252446757184, "learning_rate": 4.944887656423248e-07, "loss": 0.5145394206047058, "step": 5135 }, { "epoch": 1.8098678414096916, "grad_norm": 1.9221197947181823, "learning_rate": 4.926808863024102e-07, "loss": 0.5733104348182678, "step": 5136 }, { "epoch": 1.8102202643171807, "grad_norm": 1.955048282910108, "learning_rate": 4.908762344014573e-07, "loss": 0.5925072431564331, "step": 5137 }, { "epoch": 1.8105726872246697, "grad_norm": 1.8754640994406597, "learning_rate": 4.890748105520859e-07, "loss": 0.5346912145614624, "step": 5138 }, { "epoch": 1.8109251101321586, "grad_norm": 1.636475505756285, "learning_rate": 4.87276615365827e-07, "loss": 0.6206755638122559, "step": 5139 }, { "epoch": 1.8112775330396476, "grad_norm": 2.0734228349073076, "learning_rate": 4.854816494531089e-07, "loss": 0.5998660326004028, "step": 5140 }, { "epoch": 1.8116299559471365, "grad_norm": 2.10222956499389, "learning_rate": 4.836899134232687e-07, "loss": 0.44545644521713257, "step": 5141 }, { "epoch": 1.8119823788546254, "grad_norm": 1.904050289597462, "learning_rate": 4.81901407884543e-07, "loss": 0.701204776763916, "step": 5142 }, { "epoch": 1.8123348017621146, "grad_norm": 1.8707530799436762, "learning_rate": 4.801161334440762e-07, "loss": 0.6103897094726562, "step": 5143 }, { "epoch": 1.8126872246696035, "grad_norm": 1.7727850982789193, "learning_rate": 4.783340907079126e-07, "loss": 0.5864719152450562, "step": 5144 }, { "epoch": 1.8130396475770925, "grad_norm": 1.436946543481978, "learning_rate": 4.7655528028099916e-07, "loss": 0.46949082612991333, "step": 5145 }, { "epoch": 1.8133920704845816, "grad_norm": 1.9729708472080463, "learning_rate": 4.7477970276718855e-07, "loss": 0.6371885538101196, "step": 5146 }, { "epoch": 1.8137444933920706, "grad_norm": 2.043577546107911, "learning_rate": 4.730073587692319e-07, "loss": 0.6819220781326294, "step": 5147 }, { "epoch": 1.8140969162995595, "grad_norm": 1.7501541102560871, "learning_rate": 4.712382488887868e-07, "loss": 0.5230735540390015, "step": 5148 }, { "epoch": 1.8144493392070484, "grad_norm": 1.6629154647812032, "learning_rate": 4.6947237372640954e-07, "loss": 0.5194997787475586, "step": 5149 }, { "epoch": 1.8148017621145374, "grad_norm": 2.6396803493511842, "learning_rate": 4.677097338815595e-07, "loss": 0.6025055050849915, "step": 5150 }, { "epoch": 1.8151541850220263, "grad_norm": 1.9158428969793393, "learning_rate": 4.6595032995260135e-07, "loss": 0.649467945098877, "step": 5151 }, { "epoch": 1.8155066079295155, "grad_norm": 1.8951471308172565, "learning_rate": 4.641941625367918e-07, "loss": 0.5216347575187683, "step": 5152 }, { "epoch": 1.8158590308370044, "grad_norm": 2.264572307408149, "learning_rate": 4.6244123223030177e-07, "loss": 0.5135647058486938, "step": 5153 }, { "epoch": 1.8162114537444936, "grad_norm": 1.8178771999892822, "learning_rate": 4.6069153962819193e-07, "loss": 0.5526058673858643, "step": 5154 }, { "epoch": 1.8165638766519825, "grad_norm": 2.050533288883353, "learning_rate": 4.589450853244315e-07, "loss": 0.5897486209869385, "step": 5155 }, { "epoch": 1.8169162995594714, "grad_norm": 1.8009014119109743, "learning_rate": 4.5720186991188517e-07, "loss": 0.5698407888412476, "step": 5156 }, { "epoch": 1.8172687224669604, "grad_norm": 1.7954864355128493, "learning_rate": 4.5546189398232075e-07, "loss": 0.579573392868042, "step": 5157 }, { "epoch": 1.8176211453744493, "grad_norm": 1.7473651992455344, "learning_rate": 4.5372515812640573e-07, "loss": 0.41852182149887085, "step": 5158 }, { "epoch": 1.8179735682819382, "grad_norm": 1.7056493552996725, "learning_rate": 4.519916629337107e-07, "loss": 0.6081204414367676, "step": 5159 }, { "epoch": 1.8183259911894272, "grad_norm": 2.046109798166009, "learning_rate": 4.502614089926982e-07, "loss": 0.5725652575492859, "step": 5160 }, { "epoch": 1.8186784140969163, "grad_norm": 1.7147916989755474, "learning_rate": 4.4853439689073965e-07, "loss": 0.5109303593635559, "step": 5161 }, { "epoch": 1.8190308370044053, "grad_norm": 1.8721629996812361, "learning_rate": 4.468106272141004e-07, "loss": 0.5647833347320557, "step": 5162 }, { "epoch": 1.8193832599118944, "grad_norm": 1.8784402680779348, "learning_rate": 4.450901005479469e-07, "loss": 0.6074738502502441, "step": 5163 }, { "epoch": 1.8197356828193834, "grad_norm": 1.9135972387212516, "learning_rate": 4.433728174763452e-07, "loss": 0.647289514541626, "step": 5164 }, { "epoch": 1.8200881057268723, "grad_norm": 2.08976454113542, "learning_rate": 4.416587785822568e-07, "loss": 0.5817590951919556, "step": 5165 }, { "epoch": 1.8204405286343612, "grad_norm": 2.105714289057314, "learning_rate": 4.399479844475485e-07, "loss": 0.6483672857284546, "step": 5166 }, { "epoch": 1.8207929515418502, "grad_norm": 1.9562649517319024, "learning_rate": 4.382404356529801e-07, "loss": 0.5439441204071045, "step": 5167 }, { "epoch": 1.821145374449339, "grad_norm": 1.8467126365486348, "learning_rate": 4.3653613277820804e-07, "loss": 0.5835710167884827, "step": 5168 }, { "epoch": 1.821497797356828, "grad_norm": 1.9450074521030982, "learning_rate": 4.3483507640179503e-07, "loss": 0.7024152874946594, "step": 5169 }, { "epoch": 1.8218502202643172, "grad_norm": 1.880332916659811, "learning_rate": 4.331372671011935e-07, "loss": 0.5223513841629028, "step": 5170 }, { "epoch": 1.8222026431718061, "grad_norm": 2.771814545513559, "learning_rate": 4.3144270545275814e-07, "loss": 0.5975688099861145, "step": 5171 }, { "epoch": 1.8225550660792953, "grad_norm": 1.5329834705964882, "learning_rate": 4.2975139203173977e-07, "loss": 0.5459109544754028, "step": 5172 }, { "epoch": 1.8229074889867842, "grad_norm": 1.8202354421886453, "learning_rate": 4.2806332741228586e-07, "loss": 0.6155862808227539, "step": 5173 }, { "epoch": 1.8232599118942732, "grad_norm": 2.2226946714753644, "learning_rate": 4.263785121674435e-07, "loss": 0.6505374908447266, "step": 5174 }, { "epoch": 1.823612334801762, "grad_norm": 1.9153455724722082, "learning_rate": 4.246969468691553e-07, "loss": 0.5243734121322632, "step": 5175 }, { "epoch": 1.823964757709251, "grad_norm": 1.8732488601912396, "learning_rate": 4.2301863208825676e-07, "loss": 0.6931817531585693, "step": 5176 }, { "epoch": 1.82431718061674, "grad_norm": 1.969859922329015, "learning_rate": 4.2134356839448665e-07, "loss": 0.5312765836715698, "step": 5177 }, { "epoch": 1.824669603524229, "grad_norm": 1.9404158745446412, "learning_rate": 4.1967175635647674e-07, "loss": 0.598992109298706, "step": 5178 }, { "epoch": 1.825022026431718, "grad_norm": 1.7631344780586065, "learning_rate": 4.1800319654175413e-07, "loss": 0.5844708681106567, "step": 5179 }, { "epoch": 1.825374449339207, "grad_norm": 1.9995354508958225, "learning_rate": 4.1633788951674357e-07, "loss": 0.5884612798690796, "step": 5180 }, { "epoch": 1.8257268722466962, "grad_norm": 1.72810410086028, "learning_rate": 4.1467583584676395e-07, "loss": 0.6038404107093811, "step": 5181 }, { "epoch": 1.826079295154185, "grad_norm": 2.339259211755874, "learning_rate": 4.130170360960317e-07, "loss": 0.6511296033859253, "step": 5182 }, { "epoch": 1.826431718061674, "grad_norm": 1.925197944351106, "learning_rate": 4.113614908276609e-07, "loss": 0.5884404182434082, "step": 5183 }, { "epoch": 1.826784140969163, "grad_norm": 1.731239361884253, "learning_rate": 4.097092006036507e-07, "loss": 0.5549901723861694, "step": 5184 }, { "epoch": 1.827136563876652, "grad_norm": 1.994782951411243, "learning_rate": 4.0806016598490707e-07, "loss": 0.561951756477356, "step": 5185 }, { "epoch": 1.8274889867841408, "grad_norm": 1.869408348764558, "learning_rate": 4.064143875312254e-07, "loss": 0.6412413120269775, "step": 5186 }, { "epoch": 1.82784140969163, "grad_norm": 1.6798143654231001, "learning_rate": 4.0477186580129447e-07, "loss": 0.6295674443244934, "step": 5187 }, { "epoch": 1.828193832599119, "grad_norm": 1.6293958799120483, "learning_rate": 4.031326013527015e-07, "loss": 0.6700723767280579, "step": 5188 }, { "epoch": 1.8285462555066079, "grad_norm": 1.8215522719850648, "learning_rate": 4.014965947419236e-07, "loss": 0.5758254528045654, "step": 5189 }, { "epoch": 1.828898678414097, "grad_norm": 1.9932829475641192, "learning_rate": 3.9986384652433654e-07, "loss": 0.6663509607315063, "step": 5190 }, { "epoch": 1.829251101321586, "grad_norm": 1.9935453293677252, "learning_rate": 3.982343572542069e-07, "loss": 0.6459337472915649, "step": 5191 }, { "epoch": 1.829603524229075, "grad_norm": 1.854876606446137, "learning_rate": 3.9660812748469336e-07, "loss": 0.6411766409873962, "step": 5192 }, { "epoch": 1.8299559471365638, "grad_norm": 2.1651745240120976, "learning_rate": 3.9498515776785207e-07, "loss": 0.711888313293457, "step": 5193 }, { "epoch": 1.8303083700440528, "grad_norm": 2.2389356684810284, "learning_rate": 3.933654486546312e-07, "loss": 0.63288813829422, "step": 5194 }, { "epoch": 1.8306607929515417, "grad_norm": 1.9048245223498055, "learning_rate": 3.9174900069486985e-07, "loss": 0.6330822706222534, "step": 5195 }, { "epoch": 1.8310132158590309, "grad_norm": 2.0831179708663154, "learning_rate": 3.901358144373035e-07, "loss": 0.7242149114608765, "step": 5196 }, { "epoch": 1.8313656387665198, "grad_norm": 1.8790323108631095, "learning_rate": 3.885258904295575e-07, "loss": 0.6741703748703003, "step": 5197 }, { "epoch": 1.831718061674009, "grad_norm": 1.9200909143991698, "learning_rate": 3.8691922921815226e-07, "loss": 0.625057578086853, "step": 5198 }, { "epoch": 1.832070484581498, "grad_norm": 2.457846968244059, "learning_rate": 3.853158313484995e-07, "loss": 0.673669159412384, "step": 5199 }, { "epoch": 1.8324229074889868, "grad_norm": 1.7310768756301407, "learning_rate": 3.837156973648992e-07, "loss": 0.5981203317642212, "step": 5200 }, { "epoch": 1.8327753303964758, "grad_norm": 2.2560941225086992, "learning_rate": 3.821188278105514e-07, "loss": 0.6577199697494507, "step": 5201 }, { "epoch": 1.8331277533039647, "grad_norm": 1.8570769012933126, "learning_rate": 3.805252232275414e-07, "loss": 0.6951043605804443, "step": 5202 }, { "epoch": 1.8334801762114536, "grad_norm": 1.874325920944958, "learning_rate": 3.7893488415684964e-07, "loss": 0.572435200214386, "step": 5203 }, { "epoch": 1.8338325991189426, "grad_norm": 1.7906206085216059, "learning_rate": 3.773478111383455e-07, "loss": 0.5849496126174927, "step": 5204 }, { "epoch": 1.8341850220264317, "grad_norm": 1.9908368337543014, "learning_rate": 3.7576400471079023e-07, "loss": 0.5380967855453491, "step": 5205 }, { "epoch": 1.8345374449339207, "grad_norm": 1.7322293442190257, "learning_rate": 3.7418346541183923e-07, "loss": 0.5681222677230835, "step": 5206 }, { "epoch": 1.8348898678414098, "grad_norm": 1.7551676131968534, "learning_rate": 3.7260619377803677e-07, "loss": 0.5012099146842957, "step": 5207 }, { "epoch": 1.8352422907488988, "grad_norm": 1.9889231090545432, "learning_rate": 3.710321903448133e-07, "loss": 0.6175205707550049, "step": 5208 }, { "epoch": 1.8355947136563877, "grad_norm": 2.0658320822662137, "learning_rate": 3.6946145564649817e-07, "loss": 0.6190954446792603, "step": 5209 }, { "epoch": 1.8359471365638766, "grad_norm": 2.067936609981899, "learning_rate": 3.678939902163048e-07, "loss": 0.6820691823959351, "step": 5210 }, { "epoch": 1.8362995594713656, "grad_norm": 1.6116358163190896, "learning_rate": 3.6632979458633867e-07, "loss": 0.5309683084487915, "step": 5211 }, { "epoch": 1.8366519823788545, "grad_norm": 1.7416007879814253, "learning_rate": 3.6476886928759726e-07, "loss": 0.5110820531845093, "step": 5212 }, { "epoch": 1.8370044052863435, "grad_norm": 1.723221372899004, "learning_rate": 3.6321121484996447e-07, "loss": 0.6226333975791931, "step": 5213 }, { "epoch": 1.8373568281938326, "grad_norm": 2.234178040191492, "learning_rate": 3.6165683180221735e-07, "loss": 0.6287777423858643, "step": 5214 }, { "epoch": 1.8377092511013216, "grad_norm": 1.9295755553308827, "learning_rate": 3.601057206720182e-07, "loss": 0.7033661603927612, "step": 5215 }, { "epoch": 1.8380616740088107, "grad_norm": 2.3805238150126473, "learning_rate": 3.5855788198592257e-07, "loss": 0.5841168165206909, "step": 5216 }, { "epoch": 1.8384140969162996, "grad_norm": 1.9475866760038651, "learning_rate": 3.570133162693734e-07, "loss": 0.6797176599502563, "step": 5217 }, { "epoch": 1.8387665198237886, "grad_norm": 1.8282916435885754, "learning_rate": 3.5547202404670246e-07, "loss": 0.4317880868911743, "step": 5218 }, { "epoch": 1.8391189427312775, "grad_norm": 1.8334146730463823, "learning_rate": 3.5393400584113004e-07, "loss": 0.4757443368434906, "step": 5219 }, { "epoch": 1.8394713656387665, "grad_norm": 1.907804753373484, "learning_rate": 3.5239926217476627e-07, "loss": 0.6341856718063354, "step": 5220 }, { "epoch": 1.8398237885462554, "grad_norm": 1.8320811149781473, "learning_rate": 3.5086779356860777e-07, "loss": 0.5401504039764404, "step": 5221 }, { "epoch": 1.8401762114537445, "grad_norm": 1.9485378653698677, "learning_rate": 3.4933960054254314e-07, "loss": 0.507185697555542, "step": 5222 }, { "epoch": 1.8405286343612335, "grad_norm": 1.8475072625751607, "learning_rate": 3.478146836153418e-07, "loss": 0.544599175453186, "step": 5223 }, { "epoch": 1.8408810572687224, "grad_norm": 1.7516560167770228, "learning_rate": 3.4629304330466964e-07, "loss": 0.5231183767318726, "step": 5224 }, { "epoch": 1.8412334801762116, "grad_norm": 1.9594972590005177, "learning_rate": 3.447746801270746e-07, "loss": 0.5505118370056152, "step": 5225 }, { "epoch": 1.8415859030837005, "grad_norm": 1.8779318369867126, "learning_rate": 3.432595945979944e-07, "loss": 0.6056097149848938, "step": 5226 }, { "epoch": 1.8419383259911895, "grad_norm": 2.1828814894071806, "learning_rate": 3.4174778723175204e-07, "loss": 0.6292518377304077, "step": 5227 }, { "epoch": 1.8422907488986784, "grad_norm": 2.121254282924953, "learning_rate": 3.4023925854156035e-07, "loss": 0.6821235418319702, "step": 5228 }, { "epoch": 1.8426431718061673, "grad_norm": 1.8646887822875091, "learning_rate": 3.3873400903951636e-07, "loss": 0.6663388013839722, "step": 5229 }, { "epoch": 1.8429955947136563, "grad_norm": 1.7699721471254064, "learning_rate": 3.3723203923660795e-07, "loss": 0.5283368825912476, "step": 5230 }, { "epoch": 1.8433480176211454, "grad_norm": 1.8757843861417383, "learning_rate": 3.35733349642704e-07, "loss": 0.6193508505821228, "step": 5231 }, { "epoch": 1.8437004405286344, "grad_norm": 1.8277200643148488, "learning_rate": 3.3423794076656635e-07, "loss": 0.5790667533874512, "step": 5232 }, { "epoch": 1.8440528634361235, "grad_norm": 1.8773326611638317, "learning_rate": 3.3274581311583786e-07, "loss": 0.5774649381637573, "step": 5233 }, { "epoch": 1.8444052863436124, "grad_norm": 1.8907427086265292, "learning_rate": 3.312569671970489e-07, "loss": 0.7818938493728638, "step": 5234 }, { "epoch": 1.8447577092511014, "grad_norm": 1.9327729742836703, "learning_rate": 3.297714035156174e-07, "loss": 0.7140024900436401, "step": 5235 }, { "epoch": 1.8451101321585903, "grad_norm": 1.8813227413168874, "learning_rate": 3.2828912257584664e-07, "loss": 0.526549220085144, "step": 5236 }, { "epoch": 1.8454625550660793, "grad_norm": 1.7801884231788352, "learning_rate": 3.268101248809219e-07, "loss": 0.5497986078262329, "step": 5237 }, { "epoch": 1.8458149779735682, "grad_norm": 1.8669723447216968, "learning_rate": 3.2533441093292153e-07, "loss": 0.587260901927948, "step": 5238 }, { "epoch": 1.8461674008810571, "grad_norm": 1.7543011465942289, "learning_rate": 3.238619812327992e-07, "loss": 0.6064329147338867, "step": 5239 }, { "epoch": 1.8465198237885463, "grad_norm": 1.6866654405083865, "learning_rate": 3.22392836280403e-07, "loss": 0.5427783727645874, "step": 5240 }, { "epoch": 1.8468722466960352, "grad_norm": 2.007154381007414, "learning_rate": 3.209269765744605e-07, "loss": 0.6315155029296875, "step": 5241 }, { "epoch": 1.8472246696035244, "grad_norm": 1.8683798567232428, "learning_rate": 3.194644026125848e-07, "loss": 0.47614991664886475, "step": 5242 }, { "epoch": 1.8475770925110133, "grad_norm": 1.7870378472192856, "learning_rate": 3.1800511489127553e-07, "loss": 0.4671345353126526, "step": 5243 }, { "epoch": 1.8479295154185023, "grad_norm": 2.1401583736619774, "learning_rate": 3.1654911390591404e-07, "loss": 0.5751510262489319, "step": 5244 }, { "epoch": 1.8482819383259912, "grad_norm": 1.8052174793154305, "learning_rate": 3.1509640015076946e-07, "loss": 0.41024816036224365, "step": 5245 }, { "epoch": 1.8486343612334801, "grad_norm": 1.731551636677765, "learning_rate": 3.136469741189918e-07, "loss": 0.5401195287704468, "step": 5246 }, { "epoch": 1.848986784140969, "grad_norm": 1.653370854405324, "learning_rate": 3.1220083630261413e-07, "loss": 0.526515007019043, "step": 5247 }, { "epoch": 1.849339207048458, "grad_norm": 1.8913718815401968, "learning_rate": 3.1075798719255813e-07, "loss": 0.5476140975952148, "step": 5248 }, { "epoch": 1.8496916299559472, "grad_norm": 1.8985078398075201, "learning_rate": 3.093184272786254e-07, "loss": 0.5542911291122437, "step": 5249 }, { "epoch": 1.850044052863436, "grad_norm": 1.880723497688654, "learning_rate": 3.078821570495005e-07, "loss": 0.5147569179534912, "step": 5250 }, { "epoch": 1.8503964757709253, "grad_norm": 1.982026450369604, "learning_rate": 3.0644917699275355e-07, "loss": 0.5774611830711365, "step": 5251 }, { "epoch": 1.8507488986784142, "grad_norm": 1.7200421440570042, "learning_rate": 3.0501948759483646e-07, "loss": 0.6516300439834595, "step": 5252 }, { "epoch": 1.8511013215859031, "grad_norm": 2.0195950340864495, "learning_rate": 3.0359308934108435e-07, "loss": 0.7598013877868652, "step": 5253 }, { "epoch": 1.851453744493392, "grad_norm": 2.0638022912417506, "learning_rate": 3.0216998271571653e-07, "loss": 0.5605336427688599, "step": 5254 }, { "epoch": 1.851806167400881, "grad_norm": 2.028778763216705, "learning_rate": 3.007501682018288e-07, "loss": 0.6549514532089233, "step": 5255 }, { "epoch": 1.85215859030837, "grad_norm": 2.059939172990393, "learning_rate": 2.993336462814089e-07, "loss": 0.5390901565551758, "step": 5256 }, { "epoch": 1.8525110132158589, "grad_norm": 1.812559235788011, "learning_rate": 2.979204174353201e-07, "loss": 0.5039275884628296, "step": 5257 }, { "epoch": 1.852863436123348, "grad_norm": 1.6793203683546194, "learning_rate": 2.9651048214330956e-07, "loss": 0.4715292453765869, "step": 5258 }, { "epoch": 1.853215859030837, "grad_norm": 1.5445048853459802, "learning_rate": 2.951038408840068e-07, "loss": 0.4593687653541565, "step": 5259 }, { "epoch": 1.8535682819383261, "grad_norm": 2.427211613937901, "learning_rate": 2.9370049413492084e-07, "loss": 0.8451346158981323, "step": 5260 }, { "epoch": 1.853920704845815, "grad_norm": 1.796887553027914, "learning_rate": 2.923004423724474e-07, "loss": 0.5567130446434021, "step": 5261 }, { "epoch": 1.854273127753304, "grad_norm": 1.6019285108338794, "learning_rate": 2.909036860718595e-07, "loss": 0.4740293622016907, "step": 5262 }, { "epoch": 1.854625550660793, "grad_norm": 1.566732286884799, "learning_rate": 2.895102257073101e-07, "loss": 0.5279378294944763, "step": 5263 }, { "epoch": 1.8549779735682819, "grad_norm": 2.0699049521167923, "learning_rate": 2.881200617518387e-07, "loss": 0.5977471470832825, "step": 5264 }, { "epoch": 1.8553303964757708, "grad_norm": 2.147594228172352, "learning_rate": 2.8673319467736104e-07, "loss": 0.5385996699333191, "step": 5265 }, { "epoch": 1.85568281938326, "grad_norm": 2.011382389323699, "learning_rate": 2.85349624954675e-07, "loss": 0.5702279806137085, "step": 5266 }, { "epoch": 1.856035242290749, "grad_norm": 1.875774247263156, "learning_rate": 2.839693530534604e-07, "loss": 0.584097146987915, "step": 5267 }, { "epoch": 1.8563876651982378, "grad_norm": 1.9561416110933127, "learning_rate": 2.825923794422758e-07, "loss": 0.6205782890319824, "step": 5268 }, { "epoch": 1.856740088105727, "grad_norm": 1.8766933117628495, "learning_rate": 2.8121870458856284e-07, "loss": 0.5626852512359619, "step": 5269 }, { "epoch": 1.857092511013216, "grad_norm": 1.826792073608219, "learning_rate": 2.798483289586396e-07, "loss": 0.6052513122558594, "step": 5270 }, { "epoch": 1.8574449339207049, "grad_norm": 2.051566447554152, "learning_rate": 2.7848125301770504e-07, "loss": 0.5074095726013184, "step": 5271 }, { "epoch": 1.8577973568281938, "grad_norm": 2.3608926664844705, "learning_rate": 2.7711747722984127e-07, "loss": 0.8006119728088379, "step": 5272 }, { "epoch": 1.8581497797356827, "grad_norm": 1.939365874771501, "learning_rate": 2.7575700205800694e-07, "loss": 0.6437188982963562, "step": 5273 }, { "epoch": 1.8585022026431717, "grad_norm": 2.070323156152843, "learning_rate": 2.743998279640403e-07, "loss": 0.6610177755355835, "step": 5274 }, { "epoch": 1.8588546255506608, "grad_norm": 2.242727394045801, "learning_rate": 2.7304595540865953e-07, "loss": 0.6041977405548096, "step": 5275 }, { "epoch": 1.8592070484581498, "grad_norm": 2.296252009493085, "learning_rate": 2.716953848514625e-07, "loss": 0.5684002041816711, "step": 5276 }, { "epoch": 1.859559471365639, "grad_norm": 2.108426771462305, "learning_rate": 2.703481167509281e-07, "loss": 0.7256498336791992, "step": 5277 }, { "epoch": 1.8599118942731279, "grad_norm": 1.959590007863519, "learning_rate": 2.690041515644093e-07, "loss": 0.7264266014099121, "step": 5278 }, { "epoch": 1.8602643171806168, "grad_norm": 2.0027244373685047, "learning_rate": 2.6766348974813895e-07, "loss": 0.5427879095077515, "step": 5279 }, { "epoch": 1.8606167400881057, "grad_norm": 1.679848534564951, "learning_rate": 2.663261317572341e-07, "loss": 0.5970745086669922, "step": 5280 }, { "epoch": 1.8609691629955947, "grad_norm": 1.9989999209106484, "learning_rate": 2.6499207804568495e-07, "loss": 0.5796299576759338, "step": 5281 }, { "epoch": 1.8613215859030836, "grad_norm": 1.6433355014728201, "learning_rate": 2.6366132906635923e-07, "loss": 0.4900246262550354, "step": 5282 }, { "epoch": 1.8616740088105725, "grad_norm": 1.8937189873731617, "learning_rate": 2.6233388527100777e-07, "loss": 0.6052582263946533, "step": 5283 }, { "epoch": 1.8620264317180617, "grad_norm": 2.1632344831004127, "learning_rate": 2.610097471102524e-07, "loss": 0.6908484697341919, "step": 5284 }, { "epoch": 1.8623788546255506, "grad_norm": 1.9493448159947622, "learning_rate": 2.596889150336024e-07, "loss": 0.6353795528411865, "step": 5285 }, { "epoch": 1.8627312775330398, "grad_norm": 2.019445353702499, "learning_rate": 2.5837138948943354e-07, "loss": 0.803575873374939, "step": 5286 }, { "epoch": 1.8630837004405287, "grad_norm": 1.9882041113358364, "learning_rate": 2.5705717092500694e-07, "loss": 0.5551957488059998, "step": 5287 }, { "epoch": 1.8634361233480177, "grad_norm": 1.9987103830633048, "learning_rate": 2.5574625978646017e-07, "loss": 0.6247879266738892, "step": 5288 }, { "epoch": 1.8637885462555066, "grad_norm": 2.072117287811421, "learning_rate": 2.544386565188062e-07, "loss": 0.6029977798461914, "step": 5289 }, { "epoch": 1.8641409691629955, "grad_norm": 2.101747258049668, "learning_rate": 2.531343615659343e-07, "loss": 0.611297070980072, "step": 5290 }, { "epoch": 1.8644933920704845, "grad_norm": 2.1168170865355616, "learning_rate": 2.518333753706137e-07, "loss": 0.5290260314941406, "step": 5291 }, { "epoch": 1.8648458149779734, "grad_norm": 1.88270236786552, "learning_rate": 2.5053569837448664e-07, "loss": 0.5988795757293701, "step": 5292 }, { "epoch": 1.8651982378854626, "grad_norm": 2.1933893236783613, "learning_rate": 2.4924133101807636e-07, "loss": 0.671028733253479, "step": 5293 }, { "epoch": 1.8655506607929515, "grad_norm": 2.195163128107634, "learning_rate": 2.4795027374077905e-07, "loss": 0.5741167664527893, "step": 5294 }, { "epoch": 1.8659030837004407, "grad_norm": 1.8793688638635475, "learning_rate": 2.4666252698086867e-07, "loss": 0.47447216510772705, "step": 5295 }, { "epoch": 1.8662555066079296, "grad_norm": 1.813537542020307, "learning_rate": 2.453780911754955e-07, "loss": 0.6535651087760925, "step": 5296 }, { "epoch": 1.8666079295154185, "grad_norm": 1.830958965071389, "learning_rate": 2.4409696676068517e-07, "loss": 0.5928847193717957, "step": 5297 }, { "epoch": 1.8669603524229075, "grad_norm": 2.1016696944101363, "learning_rate": 2.428191541713387e-07, "loss": 0.5928774476051331, "step": 5298 }, { "epoch": 1.8673127753303964, "grad_norm": 1.8181831294339377, "learning_rate": 2.415446538412358e-07, "loss": 0.5798670053482056, "step": 5299 }, { "epoch": 1.8676651982378853, "grad_norm": 1.8162014512536164, "learning_rate": 2.4027346620302707e-07, "loss": 0.6222843527793884, "step": 5300 }, { "epoch": 1.8680176211453743, "grad_norm": 1.9183032685045331, "learning_rate": 2.39005591688245e-07, "loss": 0.5501612424850464, "step": 5301 }, { "epoch": 1.8683700440528634, "grad_norm": 1.7621857286720093, "learning_rate": 2.377410307272887e-07, "loss": 0.5266422033309937, "step": 5302 }, { "epoch": 1.8687224669603524, "grad_norm": 1.9926692528436012, "learning_rate": 2.3647978374944037e-07, "loss": 0.7145729064941406, "step": 5303 }, { "epoch": 1.8690748898678415, "grad_norm": 1.8939089473542137, "learning_rate": 2.3522185118285411e-07, "loss": 0.6505781412124634, "step": 5304 }, { "epoch": 1.8694273127753305, "grad_norm": 2.0817226286854607, "learning_rate": 2.3396723345455728e-07, "loss": 0.6278528571128845, "step": 5305 }, { "epoch": 1.8697797356828194, "grad_norm": 1.790557343760165, "learning_rate": 2.3271593099045475e-07, "loss": 0.5650503039360046, "step": 5306 }, { "epoch": 1.8701321585903083, "grad_norm": 1.6157546701422072, "learning_rate": 2.314679442153256e-07, "loss": 0.6267939209938049, "step": 5307 }, { "epoch": 1.8704845814977973, "grad_norm": 1.874302486649101, "learning_rate": 2.302232735528187e-07, "loss": 0.45913875102996826, "step": 5308 }, { "epoch": 1.8708370044052862, "grad_norm": 1.7607480001908633, "learning_rate": 2.289819194254661e-07, "loss": 0.6122059226036072, "step": 5309 }, { "epoch": 1.8711894273127754, "grad_norm": 1.803806841150382, "learning_rate": 2.2774388225466514e-07, "loss": 0.6479405164718628, "step": 5310 }, { "epoch": 1.8715418502202643, "grad_norm": 1.8546829656575279, "learning_rate": 2.26509162460693e-07, "loss": 0.5013849139213562, "step": 5311 }, { "epoch": 1.8718942731277532, "grad_norm": 1.749663744266161, "learning_rate": 2.2527776046269767e-07, "loss": 0.6431373357772827, "step": 5312 }, { "epoch": 1.8722466960352424, "grad_norm": 1.669095711801791, "learning_rate": 2.2404967667870147e-07, "loss": 0.6447317004203796, "step": 5313 }, { "epoch": 1.8725991189427313, "grad_norm": 2.405218866271529, "learning_rate": 2.2282491152560203e-07, "loss": 0.5784682631492615, "step": 5314 }, { "epoch": 1.8729515418502203, "grad_norm": 1.7544004376252713, "learning_rate": 2.2160346541916677e-07, "loss": 0.560835599899292, "step": 5315 }, { "epoch": 1.8733039647577092, "grad_norm": 1.7162975954294335, "learning_rate": 2.2038533877404066e-07, "loss": 0.5930913686752319, "step": 5316 }, { "epoch": 1.8736563876651982, "grad_norm": 1.9892540663354406, "learning_rate": 2.1917053200374073e-07, "loss": 0.7221095561981201, "step": 5317 }, { "epoch": 1.874008810572687, "grad_norm": 1.9380281400359725, "learning_rate": 2.179590455206515e-07, "loss": 0.6307567358016968, "step": 5318 }, { "epoch": 1.8743612334801762, "grad_norm": 2.0190052317760814, "learning_rate": 2.167508797360396e-07, "loss": 0.6158597469329834, "step": 5319 }, { "epoch": 1.8747136563876652, "grad_norm": 1.7468326387459954, "learning_rate": 2.1554603506003802e-07, "loss": 0.5778557062149048, "step": 5320 }, { "epoch": 1.8750660792951543, "grad_norm": 1.497372593580549, "learning_rate": 2.1434451190165294e-07, "loss": 0.5213632583618164, "step": 5321 }, { "epoch": 1.8754185022026433, "grad_norm": 1.8555907678767487, "learning_rate": 2.131463106687659e-07, "loss": 0.6633203029632568, "step": 5322 }, { "epoch": 1.8757709251101322, "grad_norm": 1.9991798348617227, "learning_rate": 2.1195143176812817e-07, "loss": 0.6586780548095703, "step": 5323 }, { "epoch": 1.8761233480176212, "grad_norm": 1.991978810673319, "learning_rate": 2.1075987560536305e-07, "loss": 0.4946047067642212, "step": 5324 }, { "epoch": 1.87647577092511, "grad_norm": 1.6744690075916624, "learning_rate": 2.0957164258497031e-07, "loss": 0.5689302682876587, "step": 5325 }, { "epoch": 1.876828193832599, "grad_norm": 1.9550201402383367, "learning_rate": 2.0838673311031287e-07, "loss": 0.5761843323707581, "step": 5326 }, { "epoch": 1.877180616740088, "grad_norm": 1.6070623974889393, "learning_rate": 2.0720514758363343e-07, "loss": 0.5714447498321533, "step": 5327 }, { "epoch": 1.8775330396475771, "grad_norm": 1.7537019465709125, "learning_rate": 2.0602688640604441e-07, "loss": 0.4566301107406616, "step": 5328 }, { "epoch": 1.877885462555066, "grad_norm": 2.110089760102471, "learning_rate": 2.04851949977527e-07, "loss": 0.6326137781143188, "step": 5329 }, { "epoch": 1.8782378854625552, "grad_norm": 1.8775980517302555, "learning_rate": 2.036803386969355e-07, "loss": 0.6342206001281738, "step": 5330 }, { "epoch": 1.8785903083700441, "grad_norm": 1.9958405881870251, "learning_rate": 2.0251205296199616e-07, "loss": 0.5525872707366943, "step": 5331 }, { "epoch": 1.878942731277533, "grad_norm": 1.6965395036886874, "learning_rate": 2.0134709316930733e-07, "loss": 0.4932950735092163, "step": 5332 }, { "epoch": 1.879295154185022, "grad_norm": 1.7918605717870588, "learning_rate": 2.001854597143349e-07, "loss": 0.6526485681533813, "step": 5333 }, { "epoch": 1.879647577092511, "grad_norm": 1.8862781919579625, "learning_rate": 1.990271529914156e-07, "loss": 0.6256940960884094, "step": 5334 }, { "epoch": 1.88, "grad_norm": 2.361417623387243, "learning_rate": 1.9787217339376053e-07, "loss": 0.6406987905502319, "step": 5335 }, { "epoch": 1.8803524229074888, "grad_norm": 1.812802653812012, "learning_rate": 1.9672052131345043e-07, "loss": 0.6141321659088135, "step": 5336 }, { "epoch": 1.880704845814978, "grad_norm": 2.025004487176686, "learning_rate": 1.955721971414326e-07, "loss": 0.558428943157196, "step": 5337 }, { "epoch": 1.881057268722467, "grad_norm": 1.973943138705469, "learning_rate": 1.9442720126752968e-07, "loss": 0.5995065569877625, "step": 5338 }, { "epoch": 1.881409691629956, "grad_norm": 1.6822565518265986, "learning_rate": 1.932855340804296e-07, "loss": 0.5109822750091553, "step": 5339 }, { "epoch": 1.881762114537445, "grad_norm": 1.941646392245956, "learning_rate": 1.921471959676957e-07, "loss": 0.6695220470428467, "step": 5340 }, { "epoch": 1.882114537444934, "grad_norm": 1.8857636319654494, "learning_rate": 1.9101218731575777e-07, "loss": 0.6982283592224121, "step": 5341 }, { "epoch": 1.882466960352423, "grad_norm": 1.8944501787373655, "learning_rate": 1.8988050850991314e-07, "loss": 0.6475410461425781, "step": 5342 }, { "epoch": 1.8828193832599118, "grad_norm": 1.7449353446414906, "learning_rate": 1.8875215993433448e-07, "loss": 0.57706218957901, "step": 5343 }, { "epoch": 1.8831718061674008, "grad_norm": 1.708696671712054, "learning_rate": 1.8762714197205988e-07, "loss": 0.5243045091629028, "step": 5344 }, { "epoch": 1.88352422907489, "grad_norm": 1.797956034726921, "learning_rate": 1.865054550049994e-07, "loss": 0.6208887100219727, "step": 5345 }, { "epoch": 1.8838766519823789, "grad_norm": 1.9048581772706628, "learning_rate": 1.853870994139284e-07, "loss": 0.5572443008422852, "step": 5346 }, { "epoch": 1.8842290748898678, "grad_norm": 1.7939928987370566, "learning_rate": 1.8427207557849436e-07, "loss": 0.5673031806945801, "step": 5347 }, { "epoch": 1.884581497797357, "grad_norm": 1.6894216214789064, "learning_rate": 1.8316038387721558e-07, "loss": 0.5085422992706299, "step": 5348 }, { "epoch": 1.8849339207048459, "grad_norm": 1.7455381888238348, "learning_rate": 1.8205202468747463e-07, "loss": 0.5480824708938599, "step": 5349 }, { "epoch": 1.8852863436123348, "grad_norm": 1.7848642016680003, "learning_rate": 1.8094699838552387e-07, "loss": 0.6236293911933899, "step": 5350 }, { "epoch": 1.8856387665198238, "grad_norm": 1.7626474829765526, "learning_rate": 1.798453053464888e-07, "loss": 0.541741132736206, "step": 5351 }, { "epoch": 1.8859911894273127, "grad_norm": 1.7289887528200605, "learning_rate": 1.7874694594435692e-07, "loss": 0.5309538245201111, "step": 5352 }, { "epoch": 1.8863436123348016, "grad_norm": 1.944311199542912, "learning_rate": 1.7765192055198888e-07, "loss": 0.5886228084564209, "step": 5353 }, { "epoch": 1.8866960352422908, "grad_norm": 1.6415851491633797, "learning_rate": 1.7656022954111064e-07, "loss": 0.6216265559196472, "step": 5354 }, { "epoch": 1.8870484581497797, "grad_norm": 1.6922081510439257, "learning_rate": 1.7547187328231575e-07, "loss": 0.5393999814987183, "step": 5355 }, { "epoch": 1.8874008810572689, "grad_norm": 1.7167987260272457, "learning_rate": 1.74386852145072e-07, "loss": 0.583373486995697, "step": 5356 }, { "epoch": 1.8877533039647578, "grad_norm": 2.361225928566298, "learning_rate": 1.73305166497707e-07, "loss": 0.6403313875198364, "step": 5357 }, { "epoch": 1.8881057268722468, "grad_norm": 1.771396849548527, "learning_rate": 1.7222681670741814e-07, "loss": 0.5780963897705078, "step": 5358 }, { "epoch": 1.8884581497797357, "grad_norm": 1.59802053134679, "learning_rate": 1.711518031402748e-07, "loss": 0.6046397686004639, "step": 5359 }, { "epoch": 1.8888105726872246, "grad_norm": 1.5504259730519754, "learning_rate": 1.700801261612084e-07, "loss": 0.5582219362258911, "step": 5360 }, { "epoch": 1.8891629955947136, "grad_norm": 1.962329345083699, "learning_rate": 1.6901178613402125e-07, "loss": 0.4880410432815552, "step": 5361 }, { "epoch": 1.8895154185022025, "grad_norm": 2.055990524297856, "learning_rate": 1.6794678342138105e-07, "loss": 0.7417550086975098, "step": 5362 }, { "epoch": 1.8898678414096917, "grad_norm": 1.8316934396355506, "learning_rate": 1.668851183848219e-07, "loss": 0.4616948962211609, "step": 5363 }, { "epoch": 1.8902202643171806, "grad_norm": 1.6177478399502592, "learning_rate": 1.658267913847489e-07, "loss": 0.5595716834068298, "step": 5364 }, { "epoch": 1.8905726872246698, "grad_norm": 1.9610306002643032, "learning_rate": 1.6477180278042793e-07, "loss": 0.72450852394104, "step": 5365 }, { "epoch": 1.8909251101321587, "grad_norm": 1.8036541582694667, "learning_rate": 1.637201529299959e-07, "loss": 0.6261592507362366, "step": 5366 }, { "epoch": 1.8912775330396476, "grad_norm": 2.1024939179342823, "learning_rate": 1.6267184219045607e-07, "loss": 0.5023064613342285, "step": 5367 }, { "epoch": 1.8916299559471366, "grad_norm": 1.9210322300280602, "learning_rate": 1.6162687091767714e-07, "loss": 0.7113457918167114, "step": 5368 }, { "epoch": 1.8919823788546255, "grad_norm": 1.9212954550271457, "learning_rate": 1.6058523946639426e-07, "loss": 0.5376787185668945, "step": 5369 }, { "epoch": 1.8923348017621144, "grad_norm": 1.86817536856008, "learning_rate": 1.5954694819020788e-07, "loss": 0.6523979902267456, "step": 5370 }, { "epoch": 1.8926872246696034, "grad_norm": 1.841265437549123, "learning_rate": 1.5851199744158607e-07, "loss": 0.6610705852508545, "step": 5371 }, { "epoch": 1.8930396475770925, "grad_norm": 2.0967966308369053, "learning_rate": 1.5748038757186445e-07, "loss": 0.657126247882843, "step": 5372 }, { "epoch": 1.8933920704845815, "grad_norm": 2.3300722251609893, "learning_rate": 1.5645211893123846e-07, "loss": 0.7247096300125122, "step": 5373 }, { "epoch": 1.8937444933920706, "grad_norm": 1.5063549897958597, "learning_rate": 1.5542719186877553e-07, "loss": 0.5392117500305176, "step": 5374 }, { "epoch": 1.8940969162995596, "grad_norm": 1.706529406386883, "learning_rate": 1.5440560673240735e-07, "loss": 0.5038361549377441, "step": 5375 }, { "epoch": 1.8944493392070485, "grad_norm": 1.9403637299706042, "learning_rate": 1.5338736386892982e-07, "loss": 0.4768316447734833, "step": 5376 }, { "epoch": 1.8948017621145374, "grad_norm": 1.7917263966392405, "learning_rate": 1.5237246362400316e-07, "loss": 0.5925793051719666, "step": 5377 }, { "epoch": 1.8951541850220264, "grad_norm": 2.029166285154972, "learning_rate": 1.5136090634215616e-07, "loss": 0.47840988636016846, "step": 5378 }, { "epoch": 1.8955066079295153, "grad_norm": 1.9172034216887006, "learning_rate": 1.5035269236677974e-07, "loss": 0.6365169882774353, "step": 5379 }, { "epoch": 1.8958590308370042, "grad_norm": 1.789950493711397, "learning_rate": 1.4934782204013344e-07, "loss": 0.6287797689437866, "step": 5380 }, { "epoch": 1.8962114537444934, "grad_norm": 1.8420293657892082, "learning_rate": 1.4834629570333548e-07, "loss": 0.6859137415885925, "step": 5381 }, { "epoch": 1.8965638766519823, "grad_norm": 1.9365437650034845, "learning_rate": 1.4734811369637725e-07, "loss": 0.5545040369033813, "step": 5382 }, { "epoch": 1.8969162995594715, "grad_norm": 1.6857031681916985, "learning_rate": 1.463532763581077e-07, "loss": 0.6418923139572144, "step": 5383 }, { "epoch": 1.8972687224669604, "grad_norm": 4.115242480246632, "learning_rate": 1.4536178402624334e-07, "loss": 0.7618488669395447, "step": 5384 }, { "epoch": 1.8976211453744494, "grad_norm": 1.7790399709296727, "learning_rate": 1.4437363703736718e-07, "loss": 0.6178286671638489, "step": 5385 }, { "epoch": 1.8979735682819383, "grad_norm": 2.33955789440919, "learning_rate": 1.4338883572692087e-07, "loss": 0.6800570487976074, "step": 5386 }, { "epoch": 1.8983259911894272, "grad_norm": 1.9056441030293936, "learning_rate": 1.4240738042921588e-07, "loss": 0.6063584089279175, "step": 5387 }, { "epoch": 1.8986784140969162, "grad_norm": 1.857878498727731, "learning_rate": 1.4142927147742792e-07, "loss": 0.5631873607635498, "step": 5388 }, { "epoch": 1.8990308370044053, "grad_norm": 1.6999145603505723, "learning_rate": 1.4045450920358917e-07, "loss": 0.5346484184265137, "step": 5389 }, { "epoch": 1.8993832599118943, "grad_norm": 1.660876208730021, "learning_rate": 1.3948309393860605e-07, "loss": 0.5043535232543945, "step": 5390 }, { "epoch": 1.8997356828193832, "grad_norm": 1.9091498065078292, "learning_rate": 1.3851502601224032e-07, "loss": 0.6591805219650269, "step": 5391 }, { "epoch": 1.9000881057268724, "grad_norm": 1.777554153966534, "learning_rate": 1.3755030575312355e-07, "loss": 0.6831244826316833, "step": 5392 }, { "epoch": 1.9004405286343613, "grad_norm": 1.744983267268657, "learning_rate": 1.3658893348874714e-07, "loss": 0.6572617292404175, "step": 5393 }, { "epoch": 1.9007929515418502, "grad_norm": 2.007956379457216, "learning_rate": 1.3563090954546555e-07, "loss": 0.5834530591964722, "step": 5394 }, { "epoch": 1.9011453744493392, "grad_norm": 1.8405418946212868, "learning_rate": 1.3467623424850084e-07, "loss": 0.5810972452163696, "step": 5395 }, { "epoch": 1.9014977973568281, "grad_norm": 1.8342670520255937, "learning_rate": 1.3372490792193493e-07, "loss": 0.6338596940040588, "step": 5396 }, { "epoch": 1.901850220264317, "grad_norm": 2.4739742581402946, "learning_rate": 1.327769308887117e-07, "loss": 0.5274045467376709, "step": 5397 }, { "epoch": 1.9022026431718062, "grad_norm": 2.13415646905843, "learning_rate": 1.3183230347064147e-07, "loss": 0.5416278839111328, "step": 5398 }, { "epoch": 1.9025550660792951, "grad_norm": 1.8878260396672215, "learning_rate": 1.3089102598839442e-07, "loss": 0.4818935692310333, "step": 5399 }, { "epoch": 1.9029074889867843, "grad_norm": 1.6383283062285148, "learning_rate": 1.299530987615072e-07, "loss": 0.4553770124912262, "step": 5400 }, { "epoch": 1.9032599118942732, "grad_norm": 1.7060011862412936, "learning_rate": 1.2901852210837507e-07, "loss": 0.5663920640945435, "step": 5401 }, { "epoch": 1.9036123348017622, "grad_norm": 1.975611905778012, "learning_rate": 1.2808729634625872e-07, "loss": 0.5654638409614563, "step": 5402 }, { "epoch": 1.903964757709251, "grad_norm": 2.0012288604540136, "learning_rate": 1.271594217912797e-07, "loss": 0.8061939477920532, "step": 5403 }, { "epoch": 1.90431718061674, "grad_norm": 2.149695499003911, "learning_rate": 1.2623489875842276e-07, "loss": 0.5832188129425049, "step": 5404 }, { "epoch": 1.904669603524229, "grad_norm": 1.8966385092802618, "learning_rate": 1.2531372756153458e-07, "loss": 0.6112633943557739, "step": 5405 }, { "epoch": 1.905022026431718, "grad_norm": 2.3113031929819106, "learning_rate": 1.2439590851332394e-07, "loss": 0.7083494663238525, "step": 5406 }, { "epoch": 1.905374449339207, "grad_norm": 1.9110441437452201, "learning_rate": 1.2348144192536272e-07, "loss": 0.5319055318832397, "step": 5407 }, { "epoch": 1.905726872246696, "grad_norm": 1.9724655581165889, "learning_rate": 1.2257032810808256e-07, "loss": 0.6199945211410522, "step": 5408 }, { "epoch": 1.9060792951541852, "grad_norm": 2.3233890606574503, "learning_rate": 1.2166256737077942e-07, "loss": 0.6596004962921143, "step": 5409 }, { "epoch": 1.906431718061674, "grad_norm": 1.9040617554840082, "learning_rate": 1.20758160021609e-07, "loss": 0.553988516330719, "step": 5410 }, { "epoch": 1.906784140969163, "grad_norm": 2.329855084255152, "learning_rate": 1.1985710636759128e-07, "loss": 0.6295895576477051, "step": 5411 }, { "epoch": 1.907136563876652, "grad_norm": 2.035449496855298, "learning_rate": 1.1895940671460271e-07, "loss": 0.6555598378181458, "step": 5412 }, { "epoch": 1.907488986784141, "grad_norm": 1.8252966820746244, "learning_rate": 1.1806506136738616e-07, "loss": 0.48203831911087036, "step": 5413 }, { "epoch": 1.9078414096916299, "grad_norm": 2.0052153848511045, "learning_rate": 1.1717407062954434e-07, "loss": 0.6632858514785767, "step": 5414 }, { "epoch": 1.9081938325991188, "grad_norm": 1.913108464706502, "learning_rate": 1.1628643480354085e-07, "loss": 0.6058870553970337, "step": 5415 }, { "epoch": 1.908546255506608, "grad_norm": 1.6689328390033278, "learning_rate": 1.1540215419070022e-07, "loss": 0.5106638073921204, "step": 5416 }, { "epoch": 1.9088986784140969, "grad_norm": 1.965112171139023, "learning_rate": 1.1452122909120788e-07, "loss": 0.6641250848770142, "step": 5417 }, { "epoch": 1.909251101321586, "grad_norm": 1.7797017689691026, "learning_rate": 1.1364365980411019e-07, "loss": 0.4823518395423889, "step": 5418 }, { "epoch": 1.909603524229075, "grad_norm": 1.7374946519813605, "learning_rate": 1.127694466273166e-07, "loss": 0.5770869255065918, "step": 5419 }, { "epoch": 1.909955947136564, "grad_norm": 1.8439547121423094, "learning_rate": 1.1189858985759306e-07, "loss": 0.5120491981506348, "step": 5420 }, { "epoch": 1.9103083700440529, "grad_norm": 1.998054444662161, "learning_rate": 1.1103108979056865e-07, "loss": 0.6742277145385742, "step": 5421 }, { "epoch": 1.9106607929515418, "grad_norm": 1.7361045655014782, "learning_rate": 1.1016694672073336e-07, "loss": 0.6053510904312134, "step": 5422 }, { "epoch": 1.9110132158590307, "grad_norm": 2.276872906150792, "learning_rate": 1.0930616094143698e-07, "loss": 0.5598228573799133, "step": 5423 }, { "epoch": 1.9113656387665197, "grad_norm": 1.7689371613585823, "learning_rate": 1.0844873274488799e-07, "loss": 0.599521279335022, "step": 5424 }, { "epoch": 1.9117180616740088, "grad_norm": 2.270274631303626, "learning_rate": 1.075946624221591e-07, "loss": 0.5986596345901489, "step": 5425 }, { "epoch": 1.9120704845814978, "grad_norm": 2.0819173495219054, "learning_rate": 1.067439502631773e-07, "loss": 0.5657980442047119, "step": 5426 }, { "epoch": 1.912422907488987, "grad_norm": 2.498725021517388, "learning_rate": 1.0589659655673712e-07, "loss": 0.5561040639877319, "step": 5427 }, { "epoch": 1.9127753303964758, "grad_norm": 1.6241033411576455, "learning_rate": 1.0505260159048513e-07, "loss": 0.5088320970535278, "step": 5428 }, { "epoch": 1.9131277533039648, "grad_norm": 2.1207031706665407, "learning_rate": 1.0421196565093217e-07, "loss": 0.5679075717926025, "step": 5429 }, { "epoch": 1.9134801762114537, "grad_norm": 1.8775486377310404, "learning_rate": 1.0337468902344994e-07, "loss": 0.6701461672782898, "step": 5430 }, { "epoch": 1.9138325991189427, "grad_norm": 1.7839638341554918, "learning_rate": 1.0254077199226553e-07, "loss": 0.6172112822532654, "step": 5431 }, { "epoch": 1.9141850220264316, "grad_norm": 1.904067212081221, "learning_rate": 1.0171021484046806e-07, "loss": 0.5926263332366943, "step": 5432 }, { "epoch": 1.9145374449339208, "grad_norm": 1.7190787727179386, "learning_rate": 1.0088301785000754e-07, "loss": 0.6142431497573853, "step": 5433 }, { "epoch": 1.9148898678414097, "grad_norm": 1.7095738070807496, "learning_rate": 1.0005918130168934e-07, "loss": 0.5367780923843384, "step": 5434 }, { "epoch": 1.9152422907488986, "grad_norm": 1.8769142431022592, "learning_rate": 9.923870547518311e-08, "loss": 0.5241641998291016, "step": 5435 }, { "epoch": 1.9155947136563878, "grad_norm": 1.7765958549274539, "learning_rate": 9.842159064901157e-08, "loss": 0.5906308889389038, "step": 5436 }, { "epoch": 1.9159471365638767, "grad_norm": 2.1275572555046613, "learning_rate": 9.760783710056176e-08, "loss": 0.5411181449890137, "step": 5437 }, { "epoch": 1.9162995594713657, "grad_norm": 1.9001328464490854, "learning_rate": 9.679744510607825e-08, "loss": 0.6313618421554565, "step": 5438 }, { "epoch": 1.9166519823788546, "grad_norm": 2.0658646856716336, "learning_rate": 9.599041494066208e-08, "loss": 0.6330033540725708, "step": 5439 }, { "epoch": 1.9170044052863435, "grad_norm": 1.9617429681187768, "learning_rate": 9.518674687827634e-08, "loss": 0.5859507322311401, "step": 5440 }, { "epoch": 1.9173568281938325, "grad_norm": 1.9233196169731877, "learning_rate": 9.438644119174057e-08, "loss": 0.571119487285614, "step": 5441 }, { "epoch": 1.9177092511013216, "grad_norm": 1.683294616332208, "learning_rate": 9.3589498152733e-08, "loss": 0.6114518046379089, "step": 5442 }, { "epoch": 1.9180616740088106, "grad_norm": 2.0948221060814407, "learning_rate": 9.279591803179277e-08, "loss": 0.5762027502059937, "step": 5443 }, { "epoch": 1.9184140969162997, "grad_norm": 1.973540736612678, "learning_rate": 9.200570109831441e-08, "loss": 0.6081440448760986, "step": 5444 }, { "epoch": 1.9187665198237887, "grad_norm": 1.9242540837021294, "learning_rate": 9.121884762055222e-08, "loss": 0.5682440996170044, "step": 5445 }, { "epoch": 1.9191189427312776, "grad_norm": 1.642224199268087, "learning_rate": 9.043535786561919e-08, "loss": 0.5290100574493408, "step": 5446 }, { "epoch": 1.9194713656387665, "grad_norm": 1.8013641871034827, "learning_rate": 8.965523209948367e-08, "loss": 0.5743255019187927, "step": 5447 }, { "epoch": 1.9198237885462555, "grad_norm": 1.6357977481393366, "learning_rate": 8.887847058697718e-08, "loss": 0.5955618023872375, "step": 5448 }, { "epoch": 1.9201762114537444, "grad_norm": 1.9706217525454803, "learning_rate": 8.810507359178322e-08, "loss": 0.4732915759086609, "step": 5449 }, { "epoch": 1.9205286343612333, "grad_norm": 3.2730228664607797, "learning_rate": 8.733504137644621e-08, "loss": 0.6712108850479126, "step": 5450 }, { "epoch": 1.9208810572687225, "grad_norm": 1.997966446518774, "learning_rate": 8.656837420237152e-08, "loss": 0.5169811248779297, "step": 5451 }, { "epoch": 1.9212334801762114, "grad_norm": 1.9146732631772796, "learning_rate": 8.580507232981428e-08, "loss": 0.6117082238197327, "step": 5452 }, { "epoch": 1.9215859030837006, "grad_norm": 1.7690878518096709, "learning_rate": 8.504513601789388e-08, "loss": 0.7020283937454224, "step": 5453 }, { "epoch": 1.9219383259911895, "grad_norm": 1.806111695783304, "learning_rate": 8.42885655245862e-08, "loss": 0.5489979386329651, "step": 5454 }, { "epoch": 1.9222907488986785, "grad_norm": 1.8218906131330599, "learning_rate": 8.353536110672133e-08, "loss": 0.5361644625663757, "step": 5455 }, { "epoch": 1.9226431718061674, "grad_norm": 1.8728336665856926, "learning_rate": 8.278552301998921e-08, "loss": 0.6470010280609131, "step": 5456 }, { "epoch": 1.9229955947136563, "grad_norm": 1.5338046694887773, "learning_rate": 8.203905151893731e-08, "loss": 0.4642202854156494, "step": 5457 }, { "epoch": 1.9233480176211453, "grad_norm": 2.1878989180883357, "learning_rate": 8.129594685696852e-08, "loss": 0.6817516088485718, "step": 5458 }, { "epoch": 1.9237004405286342, "grad_norm": 1.7544221338170298, "learning_rate": 8.055620928634433e-08, "loss": 0.5748617649078369, "step": 5459 }, { "epoch": 1.9240528634361234, "grad_norm": 1.9928156109239001, "learning_rate": 7.981983905818281e-08, "loss": 0.6730939149856567, "step": 5460 }, { "epoch": 1.9244052863436123, "grad_norm": 1.665760800669473, "learning_rate": 7.90868364224584e-08, "loss": 0.46469685435295105, "step": 5461 }, { "epoch": 1.9247577092511015, "grad_norm": 2.0844638903136907, "learning_rate": 7.835720162800209e-08, "loss": 0.5633926391601562, "step": 5462 }, { "epoch": 1.9251101321585904, "grad_norm": 2.034693536740542, "learning_rate": 7.76309349225035e-08, "loss": 0.5813394784927368, "step": 5463 }, { "epoch": 1.9254625550660793, "grad_norm": 1.4118750743542163, "learning_rate": 7.690803655250656e-08, "loss": 0.39959418773651123, "step": 5464 }, { "epoch": 1.9258149779735683, "grad_norm": 1.7685280750016403, "learning_rate": 7.618850676341383e-08, "loss": 0.6136372089385986, "step": 5465 }, { "epoch": 1.9261674008810572, "grad_norm": 1.7393751984149959, "learning_rate": 7.547234579948104e-08, "loss": 0.6664354801177979, "step": 5466 }, { "epoch": 1.9265198237885461, "grad_norm": 1.8827898065352628, "learning_rate": 7.475955390382483e-08, "loss": 0.6009566783905029, "step": 5467 }, { "epoch": 1.9268722466960353, "grad_norm": 1.7872694267120686, "learning_rate": 7.405013131841499e-08, "loss": 0.7307299375534058, "step": 5468 }, { "epoch": 1.9272246696035242, "grad_norm": 1.8234703336391604, "learning_rate": 7.334407828407885e-08, "loss": 0.5459531545639038, "step": 5469 }, { "epoch": 1.9275770925110132, "grad_norm": 2.1252744976115583, "learning_rate": 7.264139504049916e-08, "loss": 0.6230820417404175, "step": 5470 }, { "epoch": 1.9279295154185023, "grad_norm": 1.6781926619362313, "learning_rate": 7.194208182621509e-08, "loss": 0.5282379984855652, "step": 5471 }, { "epoch": 1.9282819383259913, "grad_norm": 2.1980396503246604, "learning_rate": 7.12461388786212e-08, "loss": 0.626023530960083, "step": 5472 }, { "epoch": 1.9286343612334802, "grad_norm": 2.1608211937841197, "learning_rate": 7.055356643396849e-08, "loss": 0.6897492408752441, "step": 5473 }, { "epoch": 1.9289867841409691, "grad_norm": 1.7214187213722456, "learning_rate": 6.986436472736447e-08, "loss": 0.583849310874939, "step": 5474 }, { "epoch": 1.929339207048458, "grad_norm": 1.7492909983006562, "learning_rate": 6.917853399277197e-08, "loss": 0.6056735515594482, "step": 5475 }, { "epoch": 1.929691629955947, "grad_norm": 1.8166317563571888, "learning_rate": 6.849607446300699e-08, "loss": 0.52838134765625, "step": 5476 }, { "epoch": 1.9300440528634362, "grad_norm": 2.0425025849187954, "learning_rate": 6.781698636974532e-08, "loss": 0.6466653943061829, "step": 5477 }, { "epoch": 1.930396475770925, "grad_norm": 1.9593462888477349, "learning_rate": 6.714126994351589e-08, "loss": 0.6570286750793457, "step": 5478 }, { "epoch": 1.9307488986784143, "grad_norm": 2.4867358577799576, "learning_rate": 6.646892541370409e-08, "loss": 0.7303042411804199, "step": 5479 }, { "epoch": 1.9311013215859032, "grad_norm": 1.7938376915708092, "learning_rate": 6.579995300854846e-08, "loss": 0.5556488037109375, "step": 5480 }, { "epoch": 1.9314537444933921, "grad_norm": 1.9624740523274589, "learning_rate": 6.513435295514404e-08, "loss": 0.6673456430435181, "step": 5481 }, { "epoch": 1.931806167400881, "grad_norm": 1.9681067241776358, "learning_rate": 6.447212547944448e-08, "loss": 0.5605199337005615, "step": 5482 }, { "epoch": 1.93215859030837, "grad_norm": 2.1935053480556785, "learning_rate": 6.381327080625111e-08, "loss": 0.5455278158187866, "step": 5483 }, { "epoch": 1.932511013215859, "grad_norm": 1.8919678372461928, "learning_rate": 6.315778915922722e-08, "loss": 0.5371166467666626, "step": 5484 }, { "epoch": 1.9328634361233479, "grad_norm": 1.9114985069981878, "learning_rate": 6.250568076088814e-08, "loss": 0.5873486399650574, "step": 5485 }, { "epoch": 1.933215859030837, "grad_norm": 1.706006640351556, "learning_rate": 6.18569458326046e-08, "loss": 0.4187420606613159, "step": 5486 }, { "epoch": 1.933568281938326, "grad_norm": 1.900919435061996, "learning_rate": 6.121158459460042e-08, "loss": 0.6006373167037964, "step": 5487 }, { "epoch": 1.9339207048458151, "grad_norm": 1.819026585986156, "learning_rate": 6.056959726595702e-08, "loss": 0.6022043228149414, "step": 5488 }, { "epoch": 1.934273127753304, "grad_norm": 2.037720704211898, "learning_rate": 5.993098406460895e-08, "loss": 0.6324778199195862, "step": 5489 }, { "epoch": 1.934625550660793, "grad_norm": 2.0263189254585026, "learning_rate": 5.929574520734505e-08, "loss": 0.545529305934906, "step": 5490 }, { "epoch": 1.934977973568282, "grad_norm": 1.9957592171950855, "learning_rate": 5.8663880909809454e-08, "loss": 0.623627781867981, "step": 5491 }, { "epoch": 1.9353303964757709, "grad_norm": 1.9773130682504432, "learning_rate": 5.80353913865006e-08, "loss": 0.529983639717102, "step": 5492 }, { "epoch": 1.9356828193832598, "grad_norm": 1.8301905692374867, "learning_rate": 5.7410276850770055e-08, "loss": 0.638504147529602, "step": 5493 }, { "epoch": 1.9360352422907487, "grad_norm": 1.7706026455559263, "learning_rate": 5.678853751482694e-08, "loss": 0.6822696924209595, "step": 5494 }, { "epoch": 1.936387665198238, "grad_norm": 1.6924491917110376, "learning_rate": 5.6170173589730204e-08, "loss": 0.5454750061035156, "step": 5495 }, { "epoch": 1.9367400881057268, "grad_norm": 2.1428203564618915, "learning_rate": 5.555518528539638e-08, "loss": 0.5301260948181152, "step": 5496 }, { "epoch": 1.937092511013216, "grad_norm": 1.965552985899495, "learning_rate": 5.4943572810594035e-08, "loss": 0.697251558303833, "step": 5497 }, { "epoch": 1.937444933920705, "grad_norm": 1.8589631146352448, "learning_rate": 5.433533637294819e-08, "loss": 0.5171586871147156, "step": 5498 }, { "epoch": 1.9377973568281939, "grad_norm": 1.974708525019113, "learning_rate": 5.373047617893479e-08, "loss": 0.6006083488464355, "step": 5499 }, { "epoch": 1.9381497797356828, "grad_norm": 1.8914658578007237, "learning_rate": 5.312899243388403e-08, "loss": 0.6083849668502808, "step": 5500 }, { "epoch": 1.9385022026431717, "grad_norm": 2.189863186886587, "learning_rate": 5.2530885341982586e-08, "loss": 0.6572569608688354, "step": 5501 }, { "epoch": 1.9388546255506607, "grad_norm": 1.9316409138269541, "learning_rate": 5.1936155106269146e-08, "loss": 0.497112512588501, "step": 5502 }, { "epoch": 1.9392070484581496, "grad_norm": 1.9380736027791932, "learning_rate": 5.1344801928636664e-08, "loss": 0.5804885625839233, "step": 5503 }, { "epoch": 1.9395594713656388, "grad_norm": 2.415405306864913, "learning_rate": 5.075682600982901e-08, "loss": 0.6225712299346924, "step": 5504 }, { "epoch": 1.9399118942731277, "grad_norm": 1.896345547525062, "learning_rate": 5.017222754944651e-08, "loss": 0.6100028157234192, "step": 5505 }, { "epoch": 1.9402643171806169, "grad_norm": 1.47523556471349, "learning_rate": 4.959100674594486e-08, "loss": 0.549712061882019, "step": 5506 }, { "epoch": 1.9406167400881058, "grad_norm": 1.4736978929928604, "learning_rate": 4.901316379662624e-08, "loss": 0.5327162146568298, "step": 5507 }, { "epoch": 1.9409691629955947, "grad_norm": 2.3670974688739697, "learning_rate": 4.8438698897652626e-08, "loss": 0.7408417463302612, "step": 5508 }, { "epoch": 1.9413215859030837, "grad_norm": 1.8644826998816841, "learning_rate": 4.7867612244036906e-08, "loss": 0.6126288175582886, "step": 5509 }, { "epoch": 1.9416740088105726, "grad_norm": 1.9600730866036664, "learning_rate": 4.729990402964402e-08, "loss": 0.542537271976471, "step": 5510 }, { "epoch": 1.9420264317180616, "grad_norm": 1.9121979922913575, "learning_rate": 4.6735574447195345e-08, "loss": 0.5429843664169312, "step": 5511 }, { "epoch": 1.9423788546255507, "grad_norm": 1.8002113296979507, "learning_rate": 4.617462368826098e-08, "loss": 0.6103960275650024, "step": 5512 }, { "epoch": 1.9427312775330396, "grad_norm": 1.7389238607151303, "learning_rate": 4.561705194326749e-08, "loss": 0.43702462315559387, "step": 5513 }, { "epoch": 1.9430837004405286, "grad_norm": 1.7641081174281446, "learning_rate": 4.506285940149457e-08, "loss": 0.5313314199447632, "step": 5514 }, { "epoch": 1.9434361233480177, "grad_norm": 1.7069377243686814, "learning_rate": 4.451204625107064e-08, "loss": 0.568792462348938, "step": 5515 }, { "epoch": 1.9437885462555067, "grad_norm": 2.1007223606906185, "learning_rate": 4.3964612678979446e-08, "loss": 0.6055475473403931, "step": 5516 }, { "epoch": 1.9441409691629956, "grad_norm": 1.9436769148628141, "learning_rate": 4.3420558871060116e-08, "loss": 0.6203786730766296, "step": 5517 }, { "epoch": 1.9444933920704845, "grad_norm": 1.788437156743959, "learning_rate": 4.287988501200047e-08, "loss": 0.5914345979690552, "step": 5518 }, { "epoch": 1.9448458149779735, "grad_norm": 1.8745063002086186, "learning_rate": 4.2342591285343684e-08, "loss": 0.5650739669799805, "step": 5519 }, { "epoch": 1.9451982378854624, "grad_norm": 1.4561818985326163, "learning_rate": 4.180867787348164e-08, "loss": 0.5589660406112671, "step": 5520 }, { "epoch": 1.9455506607929516, "grad_norm": 1.9465775114906616, "learning_rate": 4.12781449576638e-08, "loss": 0.5683336853981018, "step": 5521 }, { "epoch": 1.9459030837004405, "grad_norm": 1.7869041316521455, "learning_rate": 4.075099271798943e-08, "loss": 0.5388365983963013, "step": 5522 }, { "epoch": 1.9462555066079297, "grad_norm": 2.3465100615160757, "learning_rate": 4.0227221333408726e-08, "loss": 0.575006365776062, "step": 5523 }, { "epoch": 1.9466079295154186, "grad_norm": 1.6872132733494793, "learning_rate": 3.970683098172723e-08, "loss": 0.49638503789901733, "step": 5524 }, { "epoch": 1.9469603524229075, "grad_norm": 2.095719754969683, "learning_rate": 3.9189821839600294e-08, "loss": 0.6484041213989258, "step": 5525 }, { "epoch": 1.9473127753303965, "grad_norm": 1.7587272240429226, "learning_rate": 3.8676194082537535e-08, "loss": 0.5522493124008179, "step": 5526 }, { "epoch": 1.9476651982378854, "grad_norm": 1.8834504959770908, "learning_rate": 3.8165947884898356e-08, "loss": 0.5875294208526611, "step": 5527 }, { "epoch": 1.9480176211453744, "grad_norm": 1.8990167388470667, "learning_rate": 3.765908341989644e-08, "loss": 0.5725122690200806, "step": 5528 }, { "epoch": 1.9483700440528633, "grad_norm": 1.7744908913216453, "learning_rate": 3.7155600859595243e-08, "loss": 0.5198935866355896, "step": 5529 }, { "epoch": 1.9487224669603525, "grad_norm": 1.8236927705658619, "learning_rate": 3.665550037491361e-08, "loss": 0.6396631598472595, "step": 5530 }, { "epoch": 1.9490748898678414, "grad_norm": 1.8879612013695581, "learning_rate": 3.6158782135617965e-08, "loss": 0.666089653968811, "step": 5531 }, { "epoch": 1.9494273127753305, "grad_norm": 1.9912413735248546, "learning_rate": 3.5665446310330087e-08, "loss": 0.6818836331367493, "step": 5532 }, { "epoch": 1.9497797356828195, "grad_norm": 2.04266783813749, "learning_rate": 3.517549306652157e-08, "loss": 0.533860981464386, "step": 5533 }, { "epoch": 1.9501321585903084, "grad_norm": 2.011493253926506, "learning_rate": 3.468892257051493e-08, "loss": 0.6174973249435425, "step": 5534 }, { "epoch": 1.9504845814977974, "grad_norm": 2.07102768257305, "learning_rate": 3.4205734987488027e-08, "loss": 0.6010403037071228, "step": 5535 }, { "epoch": 1.9508370044052863, "grad_norm": 1.8654722728182422, "learning_rate": 3.372593048146744e-08, "loss": 0.6475502252578735, "step": 5536 }, { "epoch": 1.9511894273127752, "grad_norm": 2.080853183455891, "learning_rate": 3.3249509215330653e-08, "loss": 0.5625165700912476, "step": 5537 }, { "epoch": 1.9515418502202642, "grad_norm": 2.0303262611818336, "learning_rate": 3.277647135080941e-08, "loss": 0.6504719257354736, "step": 5538 }, { "epoch": 1.9518942731277533, "grad_norm": 1.7964243534988884, "learning_rate": 3.230681704848415e-08, "loss": 0.6217454671859741, "step": 5539 }, { "epoch": 1.9522466960352423, "grad_norm": 1.975881803401868, "learning_rate": 3.1840546467788445e-08, "loss": 0.5804678201675415, "step": 5540 }, { "epoch": 1.9525991189427314, "grad_norm": 1.7644690968017507, "learning_rate": 3.1377659767006795e-08, "loss": 0.6133759617805481, "step": 5541 }, { "epoch": 1.9529515418502204, "grad_norm": 1.736020484111057, "learning_rate": 3.0918157103273506e-08, "loss": 0.508539080619812, "step": 5542 }, { "epoch": 1.9533039647577093, "grad_norm": 2.115379893074018, "learning_rate": 3.0462038632577126e-08, "loss": 0.5682996511459351, "step": 5543 }, { "epoch": 1.9536563876651982, "grad_norm": 2.0360556708735276, "learning_rate": 3.000930450975603e-08, "loss": 0.7072808742523193, "step": 5544 }, { "epoch": 1.9540088105726872, "grad_norm": 2.092981328238059, "learning_rate": 2.9559954888497278e-08, "loss": 0.5948976278305054, "step": 5545 }, { "epoch": 1.954361233480176, "grad_norm": 1.827038503098094, "learning_rate": 2.911398992134218e-08, "loss": 0.5111032128334045, "step": 5546 }, { "epoch": 1.954713656387665, "grad_norm": 1.8278152391313893, "learning_rate": 2.8671409759681858e-08, "loss": 0.553802490234375, "step": 5547 }, { "epoch": 1.9550660792951542, "grad_norm": 1.685843539181356, "learning_rate": 2.8232214553759462e-08, "loss": 0.5091711282730103, "step": 5548 }, { "epoch": 1.9554185022026431, "grad_norm": 1.4871983076237012, "learning_rate": 2.7796404452666847e-08, "loss": 0.47025251388549805, "step": 5549 }, { "epoch": 1.9557709251101323, "grad_norm": 2.031516899140332, "learning_rate": 2.7363979604349e-08, "loss": 0.6174348592758179, "step": 5550 }, { "epoch": 1.9561233480176212, "grad_norm": 1.8902471541583934, "learning_rate": 2.69349401555985e-08, "loss": 0.5516685247421265, "step": 5551 }, { "epoch": 1.9564757709251102, "grad_norm": 2.1329834880360563, "learning_rate": 2.6509286252063282e-08, "loss": 0.6272131204605103, "step": 5552 }, { "epoch": 1.956828193832599, "grad_norm": 1.796045915873636, "learning_rate": 2.6087018038239987e-08, "loss": 0.5913189649581909, "step": 5553 }, { "epoch": 1.957180616740088, "grad_norm": 1.8863252927172953, "learning_rate": 2.5668135657472835e-08, "loss": 0.6802668571472168, "step": 5554 }, { "epoch": 1.957533039647577, "grad_norm": 1.9442650959080303, "learning_rate": 2.525263925196142e-08, "loss": 0.5829865336418152, "step": 5555 }, { "epoch": 1.9578854625550661, "grad_norm": 2.0474932427098627, "learning_rate": 2.4840528962752907e-08, "loss": 0.6400870680809021, "step": 5556 }, { "epoch": 1.958237885462555, "grad_norm": 2.043080792800152, "learning_rate": 2.4431804929746506e-08, "loss": 0.48432302474975586, "step": 5557 }, { "epoch": 1.958590308370044, "grad_norm": 1.9639599818265998, "learning_rate": 2.4026467291691223e-08, "loss": 0.5494402647018433, "step": 5558 }, { "epoch": 1.9589427312775332, "grad_norm": 1.800709765694371, "learning_rate": 2.3624516186186996e-08, "loss": 0.5393223762512207, "step": 5559 }, { "epoch": 1.959295154185022, "grad_norm": 1.868096905678952, "learning_rate": 2.322595174968245e-08, "loss": 0.5500867962837219, "step": 5560 }, { "epoch": 1.959647577092511, "grad_norm": 1.986290631971783, "learning_rate": 2.283077411747825e-08, "loss": 0.5618818998336792, "step": 5561 }, { "epoch": 1.96, "grad_norm": 2.0174876429391526, "learning_rate": 2.243898342372597e-08, "loss": 0.5681769251823425, "step": 5562 }, { "epoch": 1.960352422907489, "grad_norm": 2.160298007931608, "learning_rate": 2.2050579801424777e-08, "loss": 0.8009706139564514, "step": 5563 }, { "epoch": 1.9607048458149778, "grad_norm": 2.2076681264311517, "learning_rate": 2.1665563382426978e-08, "loss": 0.5609455704689026, "step": 5564 }, { "epoch": 1.961057268722467, "grad_norm": 1.6584397285315808, "learning_rate": 2.1283934297432472e-08, "loss": 0.5615163445472717, "step": 5565 }, { "epoch": 1.961409691629956, "grad_norm": 2.4819954064616265, "learning_rate": 2.0905692675993182e-08, "loss": 0.4442581832408905, "step": 5566 }, { "epoch": 1.961762114537445, "grad_norm": 2.0037139303731344, "learning_rate": 2.0530838646510842e-08, "loss": 0.6557266116142273, "step": 5567 }, { "epoch": 1.962114537444934, "grad_norm": 1.851215643338071, "learning_rate": 2.0159372336235884e-08, "loss": 0.5911799669265747, "step": 5568 }, { "epoch": 1.962466960352423, "grad_norm": 2.0920087166052057, "learning_rate": 1.9791293871269656e-08, "loss": 0.5480202436447144, "step": 5569 }, { "epoch": 1.962819383259912, "grad_norm": 2.0350633249337795, "learning_rate": 1.9426603376563325e-08, "loss": 0.6489467620849609, "step": 5570 }, { "epoch": 1.9631718061674008, "grad_norm": 1.8480180634522771, "learning_rate": 1.9065300975917856e-08, "loss": 0.4699944853782654, "step": 5571 }, { "epoch": 1.9635242290748898, "grad_norm": 1.8923901172350763, "learning_rate": 1.8707386791985137e-08, "loss": 0.6684885025024414, "step": 5572 }, { "epoch": 1.9638766519823787, "grad_norm": 2.2169126358939413, "learning_rate": 1.835286094626576e-08, "loss": 0.5847122073173523, "step": 5573 }, { "epoch": 1.9642290748898679, "grad_norm": 1.801041360244202, "learning_rate": 1.8001723559109007e-08, "loss": 0.5427859425544739, "step": 5574 }, { "epoch": 1.9645814977973568, "grad_norm": 2.032431019918, "learning_rate": 1.7653974749715087e-08, "loss": 0.6545590758323669, "step": 5575 }, { "epoch": 1.964933920704846, "grad_norm": 1.785624619961358, "learning_rate": 1.730961463613512e-08, "loss": 0.6369475722312927, "step": 5576 }, { "epoch": 1.965286343612335, "grad_norm": 1.989892215094852, "learning_rate": 1.696864333526893e-08, "loss": 0.5165325403213501, "step": 5577 }, { "epoch": 1.9656387665198238, "grad_norm": 2.147184198038496, "learning_rate": 1.6631060962863933e-08, "loss": 0.5651812553405762, "step": 5578 }, { "epoch": 1.9659911894273128, "grad_norm": 1.6839108762220567, "learning_rate": 1.6296867633519563e-08, "loss": 0.5249905586242676, "step": 5579 }, { "epoch": 1.9663436123348017, "grad_norm": 1.8723453129570697, "learning_rate": 1.5966063460683967e-08, "loss": 0.6748663783073425, "step": 5580 }, { "epoch": 1.9666960352422906, "grad_norm": 1.654472064493344, "learning_rate": 1.5638648556656198e-08, "loss": 0.5276468992233276, "step": 5581 }, { "epoch": 1.9670484581497796, "grad_norm": 1.7910399914217132, "learning_rate": 1.5314623032581798e-08, "loss": 0.5778729319572449, "step": 5582 }, { "epoch": 1.9674008810572687, "grad_norm": 1.8564203677999862, "learning_rate": 1.4993986998457223e-08, "loss": 0.5805479288101196, "step": 5583 }, { "epoch": 1.9677533039647577, "grad_norm": 1.9817945876697571, "learning_rate": 1.4676740563129843e-08, "loss": 0.6213263273239136, "step": 5584 }, { "epoch": 1.9681057268722468, "grad_norm": 1.8037978918771924, "learning_rate": 1.4362883834294627e-08, "loss": 0.5081031322479248, "step": 5585 }, { "epoch": 1.9684581497797358, "grad_norm": 1.71465121106617, "learning_rate": 1.4052416918495237e-08, "loss": 0.5605350136756897, "step": 5586 }, { "epoch": 1.9688105726872247, "grad_norm": 2.1182297496689877, "learning_rate": 1.3745339921126255e-08, "loss": 0.701635479927063, "step": 5587 }, { "epoch": 1.9691629955947136, "grad_norm": 4.768978361346767, "learning_rate": 1.344165294642985e-08, "loss": 0.5537668466567993, "step": 5588 }, { "epoch": 1.9695154185022026, "grad_norm": 1.9636754875619487, "learning_rate": 1.3141356097500225e-08, "loss": 0.6395033597946167, "step": 5589 }, { "epoch": 1.9698678414096915, "grad_norm": 2.0129419054377355, "learning_rate": 1.2844449476276943e-08, "loss": 0.549985408782959, "step": 5590 }, { "epoch": 1.9702202643171807, "grad_norm": 1.5684457658919975, "learning_rate": 1.2550933183550496e-08, "loss": 0.4503220021724701, "step": 5591 }, { "epoch": 1.9705726872246696, "grad_norm": 1.842567825609057, "learning_rate": 1.2260807318962286e-08, "loss": 0.6369946599006653, "step": 5592 }, { "epoch": 1.9709251101321585, "grad_norm": 1.9389149649481725, "learning_rate": 1.197407198099909e-08, "loss": 0.547295093536377, "step": 5593 }, { "epoch": 1.9712775330396477, "grad_norm": 1.9379429852476115, "learning_rate": 1.1690727267000823e-08, "loss": 0.578770101070404, "step": 5594 }, { "epoch": 1.9716299559471366, "grad_norm": 1.7979041690440398, "learning_rate": 1.1410773273151654e-08, "loss": 0.5992920398712158, "step": 5595 }, { "epoch": 1.9719823788546256, "grad_norm": 2.0358089708846503, "learning_rate": 1.1134210094488896e-08, "loss": 0.5912446975708008, "step": 5596 }, { "epoch": 1.9723348017621145, "grad_norm": 1.9956728807231137, "learning_rate": 1.0861037824896337e-08, "loss": 0.6539223194122314, "step": 5597 }, { "epoch": 1.9726872246696034, "grad_norm": 1.6995757910859364, "learning_rate": 1.0591256557108686e-08, "loss": 0.6487923860549927, "step": 5598 }, { "epoch": 1.9730396475770924, "grad_norm": 2.0265831695223384, "learning_rate": 1.0324866382707133e-08, "loss": 0.7950254678726196, "step": 5599 }, { "epoch": 1.9733920704845815, "grad_norm": 1.7028165277673737, "learning_rate": 1.006186739212267e-08, "loss": 0.4941173195838928, "step": 5600 }, { "epoch": 1.9737444933920705, "grad_norm": 1.8542643380709567, "learning_rate": 9.802259674637215e-09, "loss": 0.6733928322792053, "step": 5601 }, { "epoch": 1.9740969162995596, "grad_norm": 1.7591584352828642, "learning_rate": 9.546043318376941e-09, "loss": 0.5084437131881714, "step": 5602 }, { "epoch": 1.9744493392070486, "grad_norm": 1.7908335232844454, "learning_rate": 9.293218410320049e-09, "loss": 0.4499536156654358, "step": 5603 }, { "epoch": 1.9748017621145375, "grad_norm": 1.8930332249062705, "learning_rate": 9.04378503629344e-09, "loss": 0.557701826095581, "step": 5604 }, { "epoch": 1.9751541850220264, "grad_norm": 1.8335406987256675, "learning_rate": 8.797743280972715e-09, "loss": 0.6110183000564575, "step": 5605 }, { "epoch": 1.9755066079295154, "grad_norm": 1.7153205510430745, "learning_rate": 8.555093227878842e-09, "loss": 0.5877780318260193, "step": 5606 }, { "epoch": 1.9758590308370043, "grad_norm": 1.679841288677745, "learning_rate": 8.315834959385927e-09, "loss": 0.48567962646484375, "step": 5607 }, { "epoch": 1.9762114537444933, "grad_norm": 1.8860795135087454, "learning_rate": 8.079968556714557e-09, "loss": 0.5536524653434753, "step": 5608 }, { "epoch": 1.9765638766519824, "grad_norm": 1.8444573855158568, "learning_rate": 7.847494099934017e-09, "loss": 0.7685257196426392, "step": 5609 }, { "epoch": 1.9769162995594713, "grad_norm": 1.6340535068378546, "learning_rate": 7.618411667961179e-09, "loss": 0.5442079305648804, "step": 5610 }, { "epoch": 1.9772687224669605, "grad_norm": 1.6630624172257082, "learning_rate": 7.392721338563835e-09, "loss": 0.5034504532814026, "step": 5611 }, { "epoch": 1.9776211453744494, "grad_norm": 1.8322008815729305, "learning_rate": 7.1704231883551465e-09, "loss": 0.4912964701652527, "step": 5612 }, { "epoch": 1.9779735682819384, "grad_norm": 2.134310001014161, "learning_rate": 6.951517292800303e-09, "loss": 0.6034345626831055, "step": 5613 }, { "epoch": 1.9783259911894273, "grad_norm": 1.7043803614532214, "learning_rate": 6.736003726209861e-09, "loss": 0.5379009246826172, "step": 5614 }, { "epoch": 1.9786784140969162, "grad_norm": 1.8487957346880508, "learning_rate": 6.523882561744188e-09, "loss": 0.6571087837219238, "step": 5615 }, { "epoch": 1.9790308370044052, "grad_norm": 1.9626526350308011, "learning_rate": 6.315153871411239e-09, "loss": 0.6473923921585083, "step": 5616 }, { "epoch": 1.9793832599118941, "grad_norm": 1.908714917292982, "learning_rate": 6.1098177260687786e-09, "loss": 0.5877989530563354, "step": 5617 }, { "epoch": 1.9797356828193833, "grad_norm": 2.11970631488856, "learning_rate": 5.907874195422159e-09, "loss": 0.5058172941207886, "step": 5618 }, { "epoch": 1.9800881057268722, "grad_norm": 2.0101537086675143, "learning_rate": 5.70932334802432e-09, "loss": 0.6471046805381775, "step": 5619 }, { "epoch": 1.9804405286343614, "grad_norm": 1.9865988373367267, "learning_rate": 5.514165251276904e-09, "loss": 0.6687172651290894, "step": 5620 }, { "epoch": 1.9807929515418503, "grad_norm": 1.8269412184920357, "learning_rate": 5.322399971431358e-09, "loss": 0.5726118087768555, "step": 5621 }, { "epoch": 1.9811453744493392, "grad_norm": 2.247520087423374, "learning_rate": 5.134027573584499e-09, "loss": 0.6534412503242493, "step": 5622 }, { "epoch": 1.9814977973568282, "grad_norm": 1.9962084272761849, "learning_rate": 4.949048121682953e-09, "loss": 0.5972425937652588, "step": 5623 }, { "epoch": 1.9818502202643171, "grad_norm": 2.13007133485212, "learning_rate": 4.767461678522045e-09, "loss": 0.6420427560806274, "step": 5624 }, { "epoch": 1.982202643171806, "grad_norm": 2.0162772178768513, "learning_rate": 4.589268305745798e-09, "loss": 0.5912461876869202, "step": 5625 }, { "epoch": 1.982555066079295, "grad_norm": 1.8724213041645918, "learning_rate": 4.414468063843602e-09, "loss": 0.5942744016647339, "step": 5626 }, { "epoch": 1.9829074889867842, "grad_norm": 2.156440148804406, "learning_rate": 4.243061012154659e-09, "loss": 0.6618138551712036, "step": 5627 }, { "epoch": 1.983259911894273, "grad_norm": 1.949825469014612, "learning_rate": 4.075047208867977e-09, "loss": 0.6046779155731201, "step": 5628 }, { "epoch": 1.9836123348017622, "grad_norm": 1.8484056273808063, "learning_rate": 3.9104267110168235e-09, "loss": 0.6797989010810852, "step": 5629 }, { "epoch": 1.9839647577092512, "grad_norm": 1.5948446953630264, "learning_rate": 3.749199574486495e-09, "loss": 0.4882436692714691, "step": 5630 }, { "epoch": 1.9843171806167401, "grad_norm": 2.0513829030138324, "learning_rate": 3.591365854008766e-09, "loss": 0.6694678068161011, "step": 5631 }, { "epoch": 1.984669603524229, "grad_norm": 1.9290678063690434, "learning_rate": 3.436925603161889e-09, "loss": 0.6015830039978027, "step": 5632 }, { "epoch": 1.985022026431718, "grad_norm": 1.6208579673883046, "learning_rate": 3.2858788743739267e-09, "loss": 0.5627756118774414, "step": 5633 }, { "epoch": 1.985374449339207, "grad_norm": 2.0314486190919836, "learning_rate": 3.138225718920529e-09, "loss": 0.6069298386573792, "step": 5634 }, { "epoch": 1.985726872246696, "grad_norm": 1.742509198855919, "learning_rate": 2.993966186926045e-09, "loss": 0.5779693722724915, "step": 5635 }, { "epoch": 1.986079295154185, "grad_norm": 2.1341372247717914, "learning_rate": 2.8531003273624126e-09, "loss": 0.6706609725952148, "step": 5636 }, { "epoch": 1.986431718061674, "grad_norm": 1.8945369145507158, "learning_rate": 2.715628188046937e-09, "loss": 0.6361640691757202, "step": 5637 }, { "epoch": 1.9867841409691631, "grad_norm": 1.5755870938483592, "learning_rate": 2.581549815648954e-09, "loss": 0.557577908039093, "step": 5638 }, { "epoch": 1.987136563876652, "grad_norm": 1.9370315563575715, "learning_rate": 2.450865255684276e-09, "loss": 0.7735704183578491, "step": 5639 }, { "epoch": 1.987488986784141, "grad_norm": 1.7912792802398185, "learning_rate": 2.3235745525151956e-09, "loss": 0.5836409330368042, "step": 5640 }, { "epoch": 1.98784140969163, "grad_norm": 2.1774628342963616, "learning_rate": 2.1996777493527023e-09, "loss": 0.5824601650238037, "step": 5641 }, { "epoch": 1.9881938325991189, "grad_norm": 1.9237771552425715, "learning_rate": 2.0791748882575958e-09, "loss": 0.6183140873908997, "step": 5642 }, { "epoch": 1.9885462555066078, "grad_norm": 2.303117222732587, "learning_rate": 1.9620660101349333e-09, "loss": 0.6071987748146057, "step": 5643 }, { "epoch": 1.988898678414097, "grad_norm": 1.884829146752756, "learning_rate": 1.8483511547406907e-09, "loss": 0.655383825302124, "step": 5644 }, { "epoch": 1.989251101321586, "grad_norm": 2.0011243386948117, "learning_rate": 1.738030360677323e-09, "loss": 0.6328674554824829, "step": 5645 }, { "epoch": 1.989603524229075, "grad_norm": 1.7353302673505981, "learning_rate": 1.631103665394873e-09, "loss": 0.5012212991714478, "step": 5646 }, { "epoch": 1.989955947136564, "grad_norm": 1.911618533436513, "learning_rate": 1.5275711051909724e-09, "loss": 0.6202536821365356, "step": 5647 }, { "epoch": 1.990308370044053, "grad_norm": 1.7599160794894961, "learning_rate": 1.427432715214172e-09, "loss": 0.4922720789909363, "step": 5648 }, { "epoch": 1.9906607929515419, "grad_norm": 2.21375034225685, "learning_rate": 1.33068852945617e-09, "loss": 0.6591637134552002, "step": 5649 }, { "epoch": 1.9910132158590308, "grad_norm": 2.0468816346516165, "learning_rate": 1.2373385807584736e-09, "loss": 0.5481886863708496, "step": 5650 }, { "epoch": 1.9913656387665197, "grad_norm": 1.8773578553009866, "learning_rate": 1.1473829008123994e-09, "loss": 0.5642685890197754, "step": 5651 }, { "epoch": 1.9917180616740087, "grad_norm": 2.25781450338385, "learning_rate": 1.060821520153521e-09, "loss": 0.6736876368522644, "step": 5652 }, { "epoch": 1.9920704845814978, "grad_norm": 1.9047917173058142, "learning_rate": 9.776544681672218e-10, "loss": 0.6823733448982239, "step": 5653 }, { "epoch": 1.9924229074889868, "grad_norm": 1.9405145779165673, "learning_rate": 8.978817730864731e-10, "loss": 0.4686351716518402, "step": 5654 }, { "epoch": 1.992775330396476, "grad_norm": 1.8195643517733058, "learning_rate": 8.215034619907247e-10, "loss": 0.5027543306350708, "step": 5655 }, { "epoch": 1.9931277533039649, "grad_norm": 1.8439113550188309, "learning_rate": 7.485195608081253e-10, "loss": 0.6217285394668579, "step": 5656 }, { "epoch": 1.9934801762114538, "grad_norm": 1.6691057045729332, "learning_rate": 6.78930094315522e-10, "loss": 0.5942907929420471, "step": 5657 }, { "epoch": 1.9938325991189427, "grad_norm": 2.096767040436613, "learning_rate": 6.127350861351299e-10, "loss": 0.7282885313034058, "step": 5658 }, { "epoch": 1.9941850220264317, "grad_norm": 1.7781855320052453, "learning_rate": 5.499345587389737e-10, "loss": 0.653915286064148, "step": 5659 }, { "epoch": 1.9945374449339206, "grad_norm": 2.223388921265014, "learning_rate": 4.905285334455556e-10, "loss": 0.6993501782417297, "step": 5660 }, { "epoch": 1.9948898678414095, "grad_norm": 1.6333458377223884, "learning_rate": 4.3451703042207694e-10, "loss": 0.712554931640625, "step": 5661 }, { "epoch": 1.9952422907488987, "grad_norm": 2.0241768111751686, "learning_rate": 3.81900068681107e-10, "loss": 0.7523812055587769, "step": 5662 }, { "epoch": 1.9955947136563876, "grad_norm": 1.787839297148447, "learning_rate": 3.3267766608502395e-10, "loss": 0.5138256549835205, "step": 5663 }, { "epoch": 1.9959471365638768, "grad_norm": 1.9006089009269762, "learning_rate": 2.8684983934490486e-10, "loss": 0.6154034733772278, "step": 5664 }, { "epoch": 1.9962995594713657, "grad_norm": 2.0640894330103623, "learning_rate": 2.4441660401608447e-10, "loss": 0.5790190696716309, "step": 5665 }, { "epoch": 1.9966519823788547, "grad_norm": 2.000808545557318, "learning_rate": 2.0537797450370657e-10, "loss": 0.6873353719711304, "step": 5666 }, { "epoch": 1.9970044052863436, "grad_norm": 2.2263273319791583, "learning_rate": 1.6973396405939312e-10, "loss": 0.5764753222465515, "step": 5667 }, { "epoch": 1.9973568281938325, "grad_norm": 1.9150199338130098, "learning_rate": 1.374845847856854e-10, "loss": 0.5144297480583191, "step": 5668 }, { "epoch": 1.9977092511013215, "grad_norm": 2.1042696631217415, "learning_rate": 1.0862984762716189e-10, "loss": 0.5934832692146301, "step": 5669 }, { "epoch": 1.9980616740088104, "grad_norm": 1.7802660124503475, "learning_rate": 8.316976238154084e-11, "loss": 0.4544188976287842, "step": 5670 }, { "epoch": 1.9984140969162996, "grad_norm": 1.88553614744617, "learning_rate": 6.110433769079827e-11, "loss": 0.44844698905944824, "step": 5671 }, { "epoch": 1.9987665198237885, "grad_norm": 1.7574547722102742, "learning_rate": 4.2433581045608905e-11, "loss": 0.5272520780563354, "step": 5672 }, { "epoch": 1.9991189427312777, "grad_norm": 1.8300265717895403, "learning_rate": 2.715749878312579e-11, "loss": 0.5003396272659302, "step": 5673 }, { "epoch": 1.9994713656387666, "grad_norm": 2.047162707278085, "learning_rate": 1.5276096090310887e-11, "loss": 0.561710000038147, "step": 5674 }, { "epoch": 1.9998237885462555, "grad_norm": 1.8995125757935345, "learning_rate": 6.789377000604447e-12, "loss": 0.666955292224884, "step": 5675 }, { "epoch": 2.0, "grad_norm": 4.048422061559424, "learning_rate": 1.6973443939249934e-12, "loss": 0.7278814911842346, "step": 5676 } ], "logging_steps": 1, "max_steps": 5676, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1754791774076928.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }