File size: 20,862 Bytes
2b74354 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | {"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.05754596568256617, "train/loss": 0.9929917604064942, "eval/hcp-train-subset/loss": 0.9908324720398072, "eval/hcp-val/loss": 0.9896745018420681}
{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.0795222518724203, "train/loss": 0.9883668514251709, "eval/hcp-train-subset/loss": 0.9893307714693008, "eval/hcp-val/loss": 0.9877870294355577}
{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.1945328373699264, "train/loss": 0.9832695654296875, "eval/hcp-train-subset/loss": 0.9798741417546426, "eval/hcp-val/loss": 0.9769055574171005}
{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.2840052360839628, "train/loss": 0.9652900949478149, "eval/hcp-train-subset/loss": 0.9384053339881282, "eval/hcp-val/loss": 0.9344209307624448}
{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.30975041896992156, "train/loss": 0.9198785839653015, "eval/hcp-train-subset/loss": 0.9142401987506498, "eval/hcp-val/loss": 0.9094922850208897}
{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.2214050414636922, "train/loss": 0.8993835213184357, "eval/hcp-train-subset/loss": 0.8861794433286113, "eval/hcp-val/loss": 0.8808952685325376}
{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.16273339455124472, "train/loss": 0.8715805316543579, "eval/hcp-train-subset/loss": 0.8667611383622692, "eval/hcp-val/loss": 0.8613550355357509}
{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.11648855486195463, "train/loss": 0.8557996199417114, "eval/hcp-train-subset/loss": 0.858945365874998, "eval/hcp-val/loss": 0.8533292431985179}
{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.0925069770662283, "train/loss": 0.851636312456131, "eval/hcp-train-subset/loss": 0.8528163490756866, "eval/hcp-val/loss": 0.8472179087900347}
{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.08379650564265453, "train/loss": 0.8464763094043731, "eval/hcp-train-subset/loss": 0.850514562860612, "eval/hcp-val/loss": 0.845460501409346}
{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.07790791605707739, "train/loss": 0.8424201095771789, "eval/hcp-train-subset/loss": 0.8476696745041878, "eval/hcp-val/loss": 0.8425735187145972}
{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.07472675903060184, "train/loss": 0.8397171366691589, "eval/hcp-train-subset/loss": 0.8457868993282318, "eval/hcp-val/loss": 0.8407986077570146}
{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.07270870174886972, "train/loss": 0.8369358117008209, "eval/hcp-train-subset/loss": 0.8441020759844011, "eval/hcp-val/loss": 0.8386207021051838}
{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.07313090999196255, "train/loss": 0.834054965429306, "eval/hcp-train-subset/loss": 0.8427325208340922, "eval/hcp-val/loss": 0.8376958860505012}
{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.0718415184410139, "train/loss": 0.8342392716026306, "eval/hcp-train-subset/loss": 0.8429064289216073, "eval/hcp-val/loss": 0.8379386932619156}
{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.07277538045510779, "train/loss": 0.8316751469135284, "eval/hcp-train-subset/loss": 0.8394100473773095, "eval/hcp-val/loss": 0.8355350648203204}
{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.07349545297708143, "train/loss": 0.8296599831962586, "eval/hcp-train-subset/loss": 0.8398560285568237, "eval/hcp-val/loss": 0.8358489419183424}
{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.07367440152236006, "train/loss": 0.8289999080085755, "eval/hcp-train-subset/loss": 0.8386557409840245, "eval/hcp-val/loss": 0.8352220712169525}
{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.07391339711000379, "train/loss": 0.8273939764499665, "eval/hcp-train-subset/loss": 0.8386728398261531, "eval/hcp-val/loss": 0.8342263794714405}
{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.07617950705161872, "train/loss": 0.8263471906852722, "eval/hcp-train-subset/loss": 0.837996284807882, "eval/hcp-val/loss": 0.833749545197333}
{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.07761983892006297, "train/loss": 0.8251158785915375, "eval/hcp-train-subset/loss": 0.8377855506635481, "eval/hcp-val/loss": 0.8334957495812447}
{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.0776796886602923, "train/loss": 0.8239383276653289, "eval/hcp-train-subset/loss": 0.8373822887097636, "eval/hcp-val/loss": 0.8327403635747971}
{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.0815553960483402, "train/loss": 0.8203559807395935, "eval/hcp-train-subset/loss": 0.8350278437137604, "eval/hcp-val/loss": 0.8322967204355425}
{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.08047081179390893, "train/loss": 0.8225513026046752, "eval/hcp-train-subset/loss": 0.8354697535114903, "eval/hcp-val/loss": 0.8321326567280677}
{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.082121794519471, "train/loss": 0.8211892210769653, "eval/hcp-train-subset/loss": 0.8343725406354473, "eval/hcp-val/loss": 0.8314438250757032}
{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.08442137755868567, "train/loss": 0.8178489223957062, "eval/hcp-train-subset/loss": 0.8339378631884052, "eval/hcp-val/loss": 0.8306498325640156}
{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.08426846920897611, "train/loss": 0.8196334777927399, "eval/hcp-train-subset/loss": 0.8338449953063842, "eval/hcp-val/loss": 0.8307090295899299}
{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.08750328999492603, "train/loss": 0.8148395973873138, "eval/hcp-train-subset/loss": 0.8338441406526873, "eval/hcp-val/loss": 0.8300043238747504}
{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.08694597372431287, "train/loss": 0.8164607980728149, "eval/hcp-train-subset/loss": 0.8350607522072331, "eval/hcp-val/loss": 0.8298855808473402}
{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.08942557683437276, "train/loss": 0.8146612830543518, "eval/hcp-train-subset/loss": 0.8341269637307813, "eval/hcp-val/loss": 0.8294133207490367}
{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.09043404628688448, "train/loss": 0.8153467364597321, "eval/hcp-train-subset/loss": 0.8328581663870043, "eval/hcp-val/loss": 0.8298058452144745}
{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.09030556146451361, "train/loss": 0.8143413386726379, "eval/hcp-train-subset/loss": 0.8309686203156749, "eval/hcp-val/loss": 0.8295429868082846}
{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.09212302988565348, "train/loss": 0.8121198905754089, "eval/hcp-train-subset/loss": 0.8326694302020534, "eval/hcp-val/loss": 0.8292358344601046}
{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.09443310714683349, "train/loss": 0.8114089354515076, "eval/hcp-train-subset/loss": 0.8282332122325897, "eval/hcp-val/loss": 0.8282913786749686}
{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.0934448914295933, "train/loss": 0.8136572063827515, "eval/hcp-train-subset/loss": 0.8310015288091475, "eval/hcp-val/loss": 0.8284175319056357}
{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.0954233883217063, "train/loss": 0.8100913866424561, "eval/hcp-train-subset/loss": 0.8287772747778124, "eval/hcp-val/loss": 0.8277841604525044}
{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.09597049325368243, "train/loss": 0.8120141384315491, "eval/hcp-train-subset/loss": 0.8290778254309008, "eval/hcp-val/loss": 0.8273103727448371}
{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.0982779750908463, "train/loss": 0.8087359265041352, "eval/hcp-train-subset/loss": 0.8285226610399061, "eval/hcp-val/loss": 0.8273025991455201}
{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.10031336758647619, "train/loss": 0.807796542930603, "eval/hcp-train-subset/loss": 0.8294093858811163, "eval/hcp-val/loss": 0.8275299995176254}
{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.103360305444105, "train/loss": 0.8079761584377289, "eval/hcp-train-subset/loss": 0.8295217589024575, "eval/hcp-val/loss": 0.8271862576084752}
{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.10492372605472651, "train/loss": 0.8048704166984558, "eval/hcp-train-subset/loss": 0.8269047621757754, "eval/hcp-val/loss": 0.8263369481409749}
{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.10749220097419011, "train/loss": 0.8062238374710083, "eval/hcp-train-subset/loss": 0.8255278602723153, "eval/hcp-val/loss": 0.8265633967614943}
{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.11324688432697641, "train/loss": 0.8022113318920135, "eval/hcp-train-subset/loss": 0.8248415070195352, "eval/hcp-val/loss": 0.827015132673325}
{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.11088490034674178, "train/loss": 0.8064741400432587, "eval/hcp-train-subset/loss": 0.8228814851853156, "eval/hcp-val/loss": 0.8264409313278813}
{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.11573006558639667, "train/loss": 0.8020249214267731, "eval/hcp-train-subset/loss": 0.8263999598641549, "eval/hcp-val/loss": 0.8268819172536174}
{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.11926622529336456, "train/loss": 0.8024194028568268, "eval/hcp-train-subset/loss": 0.8249861867197098, "eval/hcp-val/loss": 0.8258418854205839}
{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.11918753805592962, "train/loss": 0.8036871037578582, "eval/hcp-train-subset/loss": 0.8250973791845383, "eval/hcp-val/loss": 0.8257330444551283}
{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.1246748312892781, "train/loss": 0.8019527543354035, "eval/hcp-train-subset/loss": 0.8251922255562197, "eval/hcp-val/loss": 0.8257297469723609}
{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.12584602071277348, "train/loss": 0.8003404767990112, "eval/hcp-train-subset/loss": 0.8236126053717828, "eval/hcp-val/loss": 0.8260439259390677}
{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.12896928662331666, "train/loss": 0.8000991958808898, "eval/hcp-train-subset/loss": 0.8249483608430431, "eval/hcp-val/loss": 0.8257921886059546}
{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.13460216425420074, "train/loss": 0.7983116283416748, "eval/hcp-train-subset/loss": 0.8222653019812799, "eval/hcp-val/loss": 0.8251217738274605}
{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.13513809982591837, "train/loss": 0.7970426500606537, "eval/hcp-train-subset/loss": 0.8229044435485717, "eval/hcp-val/loss": 0.8262596389939708}
{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.1424634059262272, "train/loss": 0.7961146374893189, "eval/hcp-train-subset/loss": 0.8203630322410215, "eval/hcp-val/loss": 0.8258090451840432}
{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.15051938551650998, "train/loss": 0.7933917880344391, "eval/hcp-train-subset/loss": 0.8210867374174057, "eval/hcp-val/loss": 0.8258443474769592}
{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.151909756621342, "train/loss": 0.7960136598491668, "eval/hcp-train-subset/loss": 0.8213222276779913, "eval/hcp-val/loss": 0.8250894815691056}
{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.15336442530498726, "train/loss": 0.7950226197242737, "eval/hcp-train-subset/loss": 0.8192106475753169, "eval/hcp-val/loss": 0.8247672146366488}
{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.15318682584968218, "train/loss": 0.7934264154243469, "eval/hcp-train-subset/loss": 0.818328058527362, "eval/hcp-val/loss": 0.8245542318590225}
{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.16300216784865312, "train/loss": 0.7939028549289704, "eval/hcp-train-subset/loss": 0.8204255450156427, "eval/hcp-val/loss": 0.8257395461682351}
{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.16578855277615318, "train/loss": 0.7909653675460815, "eval/hcp-train-subset/loss": 0.8186006805589122, "eval/hcp-val/loss": 0.8247257730653209}
{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.17363532784749086, "train/loss": 0.7900605303096772, "eval/hcp-train-subset/loss": 0.8174317921361616, "eval/hcp-val/loss": 0.8252084505173468}
{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.1819083488605796, "train/loss": 0.7927576683044434, "eval/hcp-train-subset/loss": 0.8164630964879067, "eval/hcp-val/loss": 0.8248093483909484}
{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.19451975683763947, "train/loss": 0.7920962606620788, "eval/hcp-train-subset/loss": 0.8169117704514535, "eval/hcp-val/loss": 0.8254074444693904}
{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.20300685807439983, "train/loss": 0.7890161262226105, "eval/hcp-train-subset/loss": 0.8164615631103516, "eval/hcp-val/loss": 0.8248520262779728}
{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.19362027160745823, "train/loss": 0.7899518791103363, "eval/hcp-train-subset/loss": 0.8149716229208054, "eval/hcp-val/loss": 0.8249270310325008}
{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.19611726729397508, "train/loss": 0.7881520581817627, "eval/hcp-train-subset/loss": 0.8127094180353226, "eval/hcp-val/loss": 0.8253886334357723}
{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.21997092315744968, "train/loss": 0.7874352488517761, "eval/hcp-train-subset/loss": 0.8102303868339907, "eval/hcp-val/loss": 0.8247647343143341}
{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.22860319950592808, "train/loss": 0.7863078270721435, "eval/hcp-train-subset/loss": 0.8119245085024065, "eval/hcp-val/loss": 0.8254036134289157}
{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.24118577227609453, "train/loss": 0.7832052378177643, "eval/hcp-train-subset/loss": 0.809495581734565, "eval/hcp-val/loss": 0.8244124708637115}
{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.24234819848627706, "train/loss": 0.7834366970348358, "eval/hcp-train-subset/loss": 0.8068633117983418, "eval/hcp-val/loss": 0.8246156496386374}
{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.26930420653997095, "train/loss": 0.7806034343719482, "eval/hcp-train-subset/loss": 0.807905868176491, "eval/hcp-val/loss": 0.824290765870002}
{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.2684079543109053, "train/loss": 0.7831319998836518, "eval/hcp-train-subset/loss": 0.8068511293780419, "eval/hcp-val/loss": 0.8254799314083592}
{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.2589876515905298, "train/loss": 0.783770079832077, "eval/hcp-train-subset/loss": 0.808048915478491, "eval/hcp-val/loss": 0.8244411849206493}
{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.263863296995045, "train/loss": 0.7803970850467682, "eval/hcp-train-subset/loss": 0.8052082186745059, "eval/hcp-val/loss": 0.8238989095534047}
{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.28241766716631445, "train/loss": 0.7822235128307342, "eval/hcp-train-subset/loss": 0.8051473123411979, "eval/hcp-val/loss": 0.8245727064148072}
{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.30825398328371834, "train/loss": 0.7808462854194641, "eval/hcp-train-subset/loss": 0.80444973322653, "eval/hcp-val/loss": 0.82420824131658}
{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.30464580840080363, "train/loss": 0.7814406863880158, "eval/hcp-train-subset/loss": 0.8026347333385099, "eval/hcp-val/loss": 0.8247568405443623}
{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.31761662306376454, "train/loss": 0.7826786768341064, "eval/hcp-train-subset/loss": 0.8030145168304443, "eval/hcp-val/loss": 0.8243960257499449}
{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.34062351526456397, "train/loss": 0.7810342831993103, "eval/hcp-train-subset/loss": 0.8010377095591638, "eval/hcp-val/loss": 0.8241924970380722}
{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.3280132667639932, "train/loss": 0.7812472697067261, "eval/hcp-train-subset/loss": 0.7999174508356279, "eval/hcp-val/loss": 0.8245073412695239}
{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.3232074997706217, "train/loss": 0.7795549619197846, "eval/hcp-train-subset/loss": 0.8004059310882322, "eval/hcp-val/loss": 0.8238097525412037}
{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.32587093426973435, "train/loss": 0.7818046122169494, "eval/hcp-train-subset/loss": 0.8008711636066437, "eval/hcp-val/loss": 0.8240477212013737}
{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.3522745265898389, "train/loss": 0.7804464736747742, "eval/hcp-train-subset/loss": 0.7979450735353655, "eval/hcp-val/loss": 0.823977803991687}
{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.3803795957925912, "train/loss": 0.7799539791297913, "eval/hcp-train-subset/loss": 0.7954702435001251, "eval/hcp-val/loss": 0.8234505470721952}
{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.39530722448181604, "train/loss": 0.7822770584106445, "eval/hcp-train-subset/loss": 0.7953284382820129, "eval/hcp-val/loss": 0.8234098861294408}
{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.3451475992789265, "train/loss": 0.7822869973373413, "eval/hcp-train-subset/loss": 0.7937996358640732, "eval/hcp-val/loss": 0.8231733268307101}
{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.3632498056414357, "train/loss": 0.7818375374889374, "eval/hcp-train-subset/loss": 0.7947834835898492, "eval/hcp-val/loss": 0.8239543860958468}
{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.3593222790501449, "train/loss": 0.7837511468219757, "eval/hcp-train-subset/loss": 0.7925595218135465, "eval/hcp-val/loss": 0.824146036178835}
{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.43385028629235817, "train/loss": 0.7812733251094818, "eval/hcp-train-subset/loss": 0.7913334811887434, "eval/hcp-val/loss": 0.8240087868705872}
{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.38648715388940863, "train/loss": 0.782629829006195, "eval/hcp-train-subset/loss": 0.788804248456032, "eval/hcp-val/loss": 0.8230096024851645}
{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.37332323315679045, "train/loss": 0.781412214717865, "eval/hcp-train-subset/loss": 0.7884892321402027, "eval/hcp-val/loss": 0.8231754216455645}
{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.40132161338398786, "train/loss": 0.7828904964828491, "eval/hcp-train-subset/loss": 0.7880712151527405, "eval/hcp-val/loss": 0.8233399295037792}
{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.4002110090419154, "train/loss": 0.782232611246109, "eval/hcp-train-subset/loss": 0.7868998387167531, "eval/hcp-val/loss": 0.8229319837785536}
{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.39687150051248876, "train/loss": 0.781724165725708, "eval/hcp-train-subset/loss": 0.7856659389311268, "eval/hcp-val/loss": 0.8229434653635948}
{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.400784849252145, "train/loss": 0.7826387121582031, "eval/hcp-train-subset/loss": 0.7857185544506196, "eval/hcp-val/loss": 0.8223740054715064}
{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.4155452824112014, "train/loss": 0.7828479433345795, "eval/hcp-train-subset/loss": 0.784943405658968, "eval/hcp-val/loss": 0.8220854420815745}
{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.4137927112635449, "train/loss": 0.7833084012603759, "eval/hcp-train-subset/loss": 0.7849180832985909, "eval/hcp-val/loss": 0.821809692728904}
{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.41248341437271785, "train/loss": 0.7819348470401764, "eval/hcp-train-subset/loss": 0.7842149301882713, "eval/hcp-val/loss": 0.822366145349318}
{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.3897590123597614, "train/loss": 0.7863908851051331, "eval/hcp-train-subset/loss": 0.7837710015235408, "eval/hcp-val/loss": 0.8223887066687307}
{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.398050315661425, "train/loss": 0.7869238539505005, "eval/hcp-train-subset/loss": 0.7838771795072863, "eval/hcp-val/loss": 0.8218101301500874}
{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.3563627934735432, "train/loss": 0.7884211170387269, "eval/hcp-train-subset/loss": 0.7836633488055198, "eval/hcp-val/loss": 0.8218959742976774}
|