| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.8744142455482662, |
| "eval_steps": 250, |
| "global_step": 4000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004686035613870665, |
| "grad_norm": 2.907787561416626, |
| "learning_rate": 9.997071227741332e-06, |
| "loss": 3.3815, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00937207122774133, |
| "grad_norm": 2.2910118103027344, |
| "learning_rate": 9.994142455482663e-06, |
| "loss": 3.3605, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.014058106841611996, |
| "grad_norm": 2.791727066040039, |
| "learning_rate": 9.991213683223994e-06, |
| "loss": 3.3338, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.01874414245548266, |
| "grad_norm": 2.881253242492676, |
| "learning_rate": 9.988284910965324e-06, |
| "loss": 3.3047, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.023430178069353328, |
| "grad_norm": 3.5495920181274414, |
| "learning_rate": 9.985356138706655e-06, |
| "loss": 3.266, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.028116213683223992, |
| "grad_norm": 3.8195812702178955, |
| "learning_rate": 9.982427366447985e-06, |
| "loss": 3.2116, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03280224929709466, |
| "grad_norm": 5.006792068481445, |
| "learning_rate": 9.979498594189316e-06, |
| "loss": 3.1271, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.03748828491096532, |
| "grad_norm": 5.206729412078857, |
| "learning_rate": 9.976569821930647e-06, |
| "loss": 3.0472, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04217432052483599, |
| "grad_norm": 6.317724227905273, |
| "learning_rate": 9.973641049671978e-06, |
| "loss": 2.9458, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.046860356138706656, |
| "grad_norm": 7.30826997756958, |
| "learning_rate": 9.97071227741331e-06, |
| "loss": 2.9002, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05154639175257732, |
| "grad_norm": 7.05161190032959, |
| "learning_rate": 9.96778350515464e-06, |
| "loss": 2.8379, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.056232427366447985, |
| "grad_norm": 12.389013290405273, |
| "learning_rate": 9.964854732895972e-06, |
| "loss": 2.7637, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.06091846298031865, |
| "grad_norm": 19.661762237548828, |
| "learning_rate": 9.961925960637301e-06, |
| "loss": 2.7413, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06560449859418932, |
| "grad_norm": 7.9712018966674805, |
| "learning_rate": 9.958997188378632e-06, |
| "loss": 2.6953, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.07029053420805999, |
| "grad_norm": 44.79791259765625, |
| "learning_rate": 9.956068416119962e-06, |
| "loss": 2.6795, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.07497656982193064, |
| "grad_norm": 7.748485565185547, |
| "learning_rate": 9.953139643861293e-06, |
| "loss": 2.6179, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07966260543580131, |
| "grad_norm": 7.135361194610596, |
| "learning_rate": 9.950210871602624e-06, |
| "loss": 2.5714, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08434864104967198, |
| "grad_norm": 5.464244365692139, |
| "learning_rate": 9.947282099343956e-06, |
| "loss": 2.4817, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08903467666354264, |
| "grad_norm": 10.304727554321289, |
| "learning_rate": 9.944353327085287e-06, |
| "loss": 2.3939, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.09372071227741331, |
| "grad_norm": 8.390380859375, |
| "learning_rate": 9.941424554826618e-06, |
| "loss": 2.3162, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09840674789128398, |
| "grad_norm": 7.206277847290039, |
| "learning_rate": 9.938495782567949e-06, |
| "loss": 2.2413, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.10309278350515463, |
| "grad_norm": 10.72529411315918, |
| "learning_rate": 9.935567010309279e-06, |
| "loss": 2.1816, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1077788191190253, |
| "grad_norm": 8.411327362060547, |
| "learning_rate": 9.93263823805061e-06, |
| "loss": 2.0204, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.11246485473289597, |
| "grad_norm": 9.118602752685547, |
| "learning_rate": 9.929709465791941e-06, |
| "loss": 1.9329, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.11715089034676664, |
| "grad_norm": 11.883502960205078, |
| "learning_rate": 9.92678069353327e-06, |
| "loss": 1.8041, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11715089034676664, |
| "eval_loss": 0.20095524191856384, |
| "eval_pearson_cosine": 0.5629603652959432, |
| "eval_pearson_dot": 0.32442021258601983, |
| "eval_pearson_euclidean": 0.5948642130310873, |
| "eval_pearson_manhattan": 0.5931866084570743, |
| "eval_runtime": 46.3498, |
| "eval_samples_per_second": 32.363, |
| "eval_spearman_cosine": 0.5645428688364399, |
| "eval_spearman_dot": 0.3123519595505677, |
| "eval_spearman_euclidean": 0.5966715855304487, |
| "eval_spearman_manhattan": 0.5951499296436052, |
| "eval_steps_per_second": 32.363, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.1218369259606373, |
| "grad_norm": 9.455839157104492, |
| "learning_rate": 9.923851921274602e-06, |
| "loss": 1.7175, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12652296157450796, |
| "grad_norm": 9.907763481140137, |
| "learning_rate": 9.920923149015933e-06, |
| "loss": 1.5752, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.13120899718837864, |
| "grad_norm": 10.268372535705566, |
| "learning_rate": 9.917994376757264e-06, |
| "loss": 1.5905, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.1358950328022493, |
| "grad_norm": 12.264440536499023, |
| "learning_rate": 9.915065604498595e-06, |
| "loss": 1.4994, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.14058106841611998, |
| "grad_norm": 10.21927547454834, |
| "learning_rate": 9.912136832239926e-06, |
| "loss": 1.4741, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.14526710402999063, |
| "grad_norm": 12.204063415527344, |
| "learning_rate": 9.909208059981256e-06, |
| "loss": 1.3685, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.14995313964386128, |
| "grad_norm": 8.701486587524414, |
| "learning_rate": 9.906279287722587e-06, |
| "loss": 1.3407, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.15463917525773196, |
| "grad_norm": 11.478012084960938, |
| "learning_rate": 9.903350515463918e-06, |
| "loss": 1.3996, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15932521087160262, |
| "grad_norm": 8.862137794494629, |
| "learning_rate": 9.90042174320525e-06, |
| "loss": 1.2921, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1640112464854733, |
| "grad_norm": 8.181413650512695, |
| "learning_rate": 9.897492970946579e-06, |
| "loss": 1.2948, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.16869728209934395, |
| "grad_norm": 12.891910552978516, |
| "learning_rate": 9.89456419868791e-06, |
| "loss": 1.2444, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1733833177132146, |
| "grad_norm": 9.783638000488281, |
| "learning_rate": 9.891635426429241e-06, |
| "loss": 1.1765, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1780693533270853, |
| "grad_norm": 10.521812438964844, |
| "learning_rate": 9.888706654170573e-06, |
| "loss": 1.2163, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.18275538894095594, |
| "grad_norm": 9.507091522216797, |
| "learning_rate": 9.885777881911904e-06, |
| "loss": 1.1555, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.18744142455482662, |
| "grad_norm": 10.072102546691895, |
| "learning_rate": 9.882849109653235e-06, |
| "loss": 1.1631, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.19212746016869728, |
| "grad_norm": 12.557927131652832, |
| "learning_rate": 9.879920337394564e-06, |
| "loss": 1.1319, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.19681349578256796, |
| "grad_norm": 7.743768692016602, |
| "learning_rate": 9.876991565135896e-06, |
| "loss": 1.2022, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.2014995313964386, |
| "grad_norm": 9.258079528808594, |
| "learning_rate": 9.874062792877227e-06, |
| "loss": 1.1219, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.20618556701030927, |
| "grad_norm": 8.362629890441895, |
| "learning_rate": 9.871134020618558e-06, |
| "loss": 1.1138, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.21087160262417995, |
| "grad_norm": 8.71789264678955, |
| "learning_rate": 9.868205248359888e-06, |
| "loss": 1.0473, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2155576382380506, |
| "grad_norm": 8.710640907287598, |
| "learning_rate": 9.865276476101219e-06, |
| "loss": 1.0933, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.22024367385192128, |
| "grad_norm": 7.57949686050415, |
| "learning_rate": 9.86234770384255e-06, |
| "loss": 1.0429, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.22492970946579194, |
| "grad_norm": 8.775091171264648, |
| "learning_rate": 9.859418931583881e-06, |
| "loss": 1.0406, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2296157450796626, |
| "grad_norm": 9.942752838134766, |
| "learning_rate": 9.856490159325212e-06, |
| "loss": 1.0526, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.23430178069353327, |
| "grad_norm": 10.166437149047852, |
| "learning_rate": 9.853561387066542e-06, |
| "loss": 1.0265, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23430178069353327, |
| "eval_loss": 0.09848710149526596, |
| "eval_pearson_cosine": 0.7114527090607083, |
| "eval_pearson_dot": 0.5814656567702485, |
| "eval_pearson_euclidean": 0.7022168021213133, |
| "eval_pearson_manhattan": 0.7010309676073874, |
| "eval_runtime": 48.356, |
| "eval_samples_per_second": 31.02, |
| "eval_spearman_cosine": 0.7098203386273151, |
| "eval_spearman_dot": 0.5861254786395066, |
| "eval_spearman_euclidean": 0.7102590115372712, |
| "eval_spearman_manhattan": 0.7094011853041999, |
| "eval_steps_per_second": 31.02, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.23898781630740393, |
| "grad_norm": 6.910321235656738, |
| "learning_rate": 9.850632614807873e-06, |
| "loss": 1.0267, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2436738519212746, |
| "grad_norm": 8.010503768920898, |
| "learning_rate": 9.847703842549204e-06, |
| "loss": 0.97, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.24835988753514526, |
| "grad_norm": 8.340336799621582, |
| "learning_rate": 9.844775070290535e-06, |
| "loss": 0.9773, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2530459231490159, |
| "grad_norm": 6.75998592376709, |
| "learning_rate": 9.841846298031867e-06, |
| "loss": 0.9694, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.25773195876288657, |
| "grad_norm": 6.592973709106445, |
| "learning_rate": 9.838917525773196e-06, |
| "loss": 0.9101, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.2624179943767573, |
| "grad_norm": 8.13701343536377, |
| "learning_rate": 9.835988753514527e-06, |
| "loss": 0.9693, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.26710402999062793, |
| "grad_norm": 10.256951332092285, |
| "learning_rate": 9.833059981255859e-06, |
| "loss": 0.9405, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.2717900656044986, |
| "grad_norm": 9.521321296691895, |
| "learning_rate": 9.83013120899719e-06, |
| "loss": 0.8731, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.27647610121836924, |
| "grad_norm": 7.164852142333984, |
| "learning_rate": 9.82720243673852e-06, |
| "loss": 0.9387, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.28116213683223995, |
| "grad_norm": 8.326433181762695, |
| "learning_rate": 9.82427366447985e-06, |
| "loss": 0.8388, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2858481724461106, |
| "grad_norm": 8.819974899291992, |
| "learning_rate": 9.821344892221182e-06, |
| "loss": 0.9034, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.29053420805998126, |
| "grad_norm": 6.0674052238464355, |
| "learning_rate": 9.818416119962513e-06, |
| "loss": 0.8225, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2952202436738519, |
| "grad_norm": 7.898690223693848, |
| "learning_rate": 9.815487347703844e-06, |
| "loss": 0.8916, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.29990627928772257, |
| "grad_norm": 9.459305763244629, |
| "learning_rate": 9.812558575445175e-06, |
| "loss": 0.8771, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3045923149015933, |
| "grad_norm": 7.231110095977783, |
| "learning_rate": 9.809629803186505e-06, |
| "loss": 0.8575, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.30927835051546393, |
| "grad_norm": 5.850890159606934, |
| "learning_rate": 9.806701030927836e-06, |
| "loss": 0.8294, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3139643861293346, |
| "grad_norm": 12.532159805297852, |
| "learning_rate": 9.803772258669167e-06, |
| "loss": 0.8745, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.31865042174320524, |
| "grad_norm": 6.576635837554932, |
| "learning_rate": 9.800843486410497e-06, |
| "loss": 0.8167, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3233364573570759, |
| "grad_norm": 7.243174076080322, |
| "learning_rate": 9.797914714151828e-06, |
| "loss": 0.8886, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3280224929709466, |
| "grad_norm": 6.775111675262451, |
| "learning_rate": 9.794985941893159e-06, |
| "loss": 0.8205, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.33270852858481725, |
| "grad_norm": 7.494016647338867, |
| "learning_rate": 9.79205716963449e-06, |
| "loss": 0.7778, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.3373945641986879, |
| "grad_norm": 5.593213081359863, |
| "learning_rate": 9.789128397375821e-06, |
| "loss": 0.7875, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.34208059981255856, |
| "grad_norm": 7.325387001037598, |
| "learning_rate": 9.786199625117153e-06, |
| "loss": 0.7839, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3467666354264292, |
| "grad_norm": 5.411241054534912, |
| "learning_rate": 9.783270852858484e-06, |
| "loss": 0.8363, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.3514526710402999, |
| "grad_norm": 5.667125225067139, |
| "learning_rate": 9.780342080599813e-06, |
| "loss": 0.7904, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3514526710402999, |
| "eval_loss": 0.07609602808952332, |
| "eval_pearson_cosine": 0.7390127527190131, |
| "eval_pearson_dot": 0.6193519334256266, |
| "eval_pearson_euclidean": 0.7286540107637123, |
| "eval_pearson_manhattan": 0.7280163166143723, |
| "eval_runtime": 48.6286, |
| "eval_samples_per_second": 30.846, |
| "eval_spearman_cosine": 0.7392385981828663, |
| "eval_spearman_dot": 0.6275059521836013, |
| "eval_spearman_euclidean": 0.7379755721813188, |
| "eval_spearman_manhattan": 0.7372480627669395, |
| "eval_steps_per_second": 30.846, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3561387066541706, |
| "grad_norm": 5.931227207183838, |
| "learning_rate": 9.777413308341144e-06, |
| "loss": 0.7801, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.36082474226804123, |
| "grad_norm": 5.550874710083008, |
| "learning_rate": 9.774484536082474e-06, |
| "loss": 0.7466, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3655107778819119, |
| "grad_norm": 5.67214298248291, |
| "learning_rate": 9.771555763823805e-06, |
| "loss": 0.7561, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3701968134957826, |
| "grad_norm": 5.121714115142822, |
| "learning_rate": 9.768626991565136e-06, |
| "loss": 0.7395, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.37488284910965325, |
| "grad_norm": 4.957924842834473, |
| "learning_rate": 9.765698219306467e-06, |
| "loss": 0.7368, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3795688847235239, |
| "grad_norm": 6.30219030380249, |
| "learning_rate": 9.762769447047799e-06, |
| "loss": 0.8091, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.38425492033739456, |
| "grad_norm": 6.518470287322998, |
| "learning_rate": 9.75984067478913e-06, |
| "loss": 0.7525, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.3889409559512652, |
| "grad_norm": 6.101437568664551, |
| "learning_rate": 9.756911902530461e-06, |
| "loss": 0.7263, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3936269915651359, |
| "grad_norm": 5.428840160369873, |
| "learning_rate": 9.75398313027179e-06, |
| "loss": 0.7881, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.3983130271790066, |
| "grad_norm": 7.170475482940674, |
| "learning_rate": 9.751054358013122e-06, |
| "loss": 0.7218, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4029990627928772, |
| "grad_norm": 6.153990745544434, |
| "learning_rate": 9.748125585754453e-06, |
| "loss": 0.748, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.4076850984067479, |
| "grad_norm": 5.364086151123047, |
| "learning_rate": 9.745196813495782e-06, |
| "loss": 0.786, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.41237113402061853, |
| "grad_norm": 5.541423797607422, |
| "learning_rate": 9.742268041237114e-06, |
| "loss": 0.7427, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.41705716963448924, |
| "grad_norm": 5.1667022705078125, |
| "learning_rate": 9.739339268978445e-06, |
| "loss": 0.6918, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4217432052483599, |
| "grad_norm": 4.839612007141113, |
| "learning_rate": 9.736410496719776e-06, |
| "loss": 0.7056, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.42642924086223055, |
| "grad_norm": 4.407963275909424, |
| "learning_rate": 9.733481724461107e-06, |
| "loss": 0.6313, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4311152764761012, |
| "grad_norm": 7.052595138549805, |
| "learning_rate": 9.730552952202438e-06, |
| "loss": 0.7489, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.43580131208997186, |
| "grad_norm": 5.71290397644043, |
| "learning_rate": 9.727624179943768e-06, |
| "loss": 0.6578, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.44048734770384257, |
| "grad_norm": 6.3575825691223145, |
| "learning_rate": 9.724695407685099e-06, |
| "loss": 0.6914, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.4451733833177132, |
| "grad_norm": 5.223476886749268, |
| "learning_rate": 9.72176663542643e-06, |
| "loss": 0.6494, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.4498594189315839, |
| "grad_norm": 6.220378398895264, |
| "learning_rate": 9.71883786316776e-06, |
| "loss": 0.6996, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.45454545454545453, |
| "grad_norm": 6.475409507751465, |
| "learning_rate": 9.715909090909091e-06, |
| "loss": 0.721, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.4592314901593252, |
| "grad_norm": 5.10095739364624, |
| "learning_rate": 9.712980318650422e-06, |
| "loss": 0.6734, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.4639175257731959, |
| "grad_norm": 7.8438801765441895, |
| "learning_rate": 9.710051546391753e-06, |
| "loss": 0.7409, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.46860356138706655, |
| "grad_norm": 5.446135997772217, |
| "learning_rate": 9.707122774133085e-06, |
| "loss": 0.6772, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.46860356138706655, |
| "eval_loss": 0.06938865035772324, |
| "eval_pearson_cosine": 0.7523242546763527, |
| "eval_pearson_dot": 0.6339033623348058, |
| "eval_pearson_euclidean": 0.7449881727323344, |
| "eval_pearson_manhattan": 0.7443626147120028, |
| "eval_runtime": 47.885, |
| "eval_samples_per_second": 31.325, |
| "eval_spearman_cosine": 0.7542578168613095, |
| "eval_spearman_dot": 0.6408093688850417, |
| "eval_spearman_euclidean": 0.7532432307302356, |
| "eval_spearman_manhattan": 0.7526380381288565, |
| "eval_steps_per_second": 31.325, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4732895970009372, |
| "grad_norm": 6.391997814178467, |
| "learning_rate": 9.704194001874416e-06, |
| "loss": 0.6965, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.47797563261480785, |
| "grad_norm": 5.345996379852295, |
| "learning_rate": 9.701265229615747e-06, |
| "loss": 0.6447, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.48266166822867856, |
| "grad_norm": 5.60822057723999, |
| "learning_rate": 9.698336457357076e-06, |
| "loss": 0.6854, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.4873477038425492, |
| "grad_norm": 6.488014221191406, |
| "learning_rate": 9.695407685098408e-06, |
| "loss": 0.7089, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.49203373945641987, |
| "grad_norm": 5.387355804443359, |
| "learning_rate": 9.692478912839737e-06, |
| "loss": 0.6949, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4967197750702905, |
| "grad_norm": 5.179281234741211, |
| "learning_rate": 9.689550140581068e-06, |
| "loss": 0.6571, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5014058106841612, |
| "grad_norm": 5.786458492279053, |
| "learning_rate": 9.6866213683224e-06, |
| "loss": 0.7154, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5060918462980318, |
| "grad_norm": 6.279985427856445, |
| "learning_rate": 9.68369259606373e-06, |
| "loss": 0.6757, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5107778819119025, |
| "grad_norm": 4.793182849884033, |
| "learning_rate": 9.680763823805062e-06, |
| "loss": 0.7136, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.5154639175257731, |
| "grad_norm": 7.646529674530029, |
| "learning_rate": 9.677835051546393e-06, |
| "loss": 0.6396, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.5201499531396439, |
| "grad_norm": 5.7034912109375, |
| "learning_rate": 9.674906279287724e-06, |
| "loss": 0.665, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.5248359887535146, |
| "grad_norm": 6.54317045211792, |
| "learning_rate": 9.671977507029054e-06, |
| "loss": 0.6713, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5295220243673852, |
| "grad_norm": 5.6496806144714355, |
| "learning_rate": 9.669048734770385e-06, |
| "loss": 0.6876, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5342080599812559, |
| "grad_norm": 5.326486110687256, |
| "learning_rate": 9.666119962511716e-06, |
| "loss": 0.6951, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5388940955951266, |
| "grad_norm": 5.124545574188232, |
| "learning_rate": 9.663191190253046e-06, |
| "loss": 0.6388, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5435801312089972, |
| "grad_norm": 4.34152364730835, |
| "learning_rate": 9.660262417994377e-06, |
| "loss": 0.6322, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5482661668228679, |
| "grad_norm": 8.722075462341309, |
| "learning_rate": 9.657333645735708e-06, |
| "loss": 0.6776, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5529522024367385, |
| "grad_norm": 5.417623996734619, |
| "learning_rate": 9.65440487347704e-06, |
| "loss": 0.6492, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5576382380506092, |
| "grad_norm": 4.369041919708252, |
| "learning_rate": 9.65147610121837e-06, |
| "loss": 0.6039, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.5623242736644799, |
| "grad_norm": 6.5720062255859375, |
| "learning_rate": 9.648547328959702e-06, |
| "loss": 0.6911, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5670103092783505, |
| "grad_norm": 7.112950325012207, |
| "learning_rate": 9.645618556701031e-06, |
| "loss": 0.6214, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5716963448922212, |
| "grad_norm": 5.643182277679443, |
| "learning_rate": 9.642689784442362e-06, |
| "loss": 0.6959, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5763823805060918, |
| "grad_norm": 5.078190803527832, |
| "learning_rate": 9.639761012183694e-06, |
| "loss": 0.6633, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5810684161199625, |
| "grad_norm": 5.247280120849609, |
| "learning_rate": 9.636832239925025e-06, |
| "loss": 0.6415, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.5857544517338332, |
| "grad_norm": 5.110747814178467, |
| "learning_rate": 9.633903467666354e-06, |
| "loss": 0.6031, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5857544517338332, |
| "eval_loss": 0.06345358490943909, |
| "eval_pearson_cosine": 0.7580338914962539, |
| "eval_pearson_dot": 0.6394158052533783, |
| "eval_pearson_euclidean": 0.7521759780114508, |
| "eval_pearson_manhattan": 0.7513571158009427, |
| "eval_runtime": 44.2242, |
| "eval_samples_per_second": 33.918, |
| "eval_spearman_cosine": 0.758882658229917, |
| "eval_spearman_dot": 0.6455380162932587, |
| "eval_spearman_euclidean": 0.7604619351541958, |
| "eval_spearman_manhattan": 0.7599139087493931, |
| "eval_steps_per_second": 33.918, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5904404873477038, |
| "grad_norm": 6.717201232910156, |
| "learning_rate": 9.630974695407685e-06, |
| "loss": 0.6553, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5951265229615745, |
| "grad_norm": 6.948915004730225, |
| "learning_rate": 9.628045923149017e-06, |
| "loss": 0.6528, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.5998125585754451, |
| "grad_norm": 5.585124969482422, |
| "learning_rate": 9.625117150890348e-06, |
| "loss": 0.6125, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.6044985941893158, |
| "grad_norm": 4.020166397094727, |
| "learning_rate": 9.622188378631679e-06, |
| "loss": 0.5857, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.6091846298031866, |
| "grad_norm": 4.905421257019043, |
| "learning_rate": 9.619259606373008e-06, |
| "loss": 0.6128, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.6138706654170571, |
| "grad_norm": 5.642446517944336, |
| "learning_rate": 9.61633083411434e-06, |
| "loss": 0.6177, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.6185567010309279, |
| "grad_norm": 5.623671531677246, |
| "learning_rate": 9.613402061855671e-06, |
| "loss": 0.6076, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.6232427366447985, |
| "grad_norm": 3.6249349117279053, |
| "learning_rate": 9.610473289597002e-06, |
| "loss": 0.5987, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.6279287722586692, |
| "grad_norm": 4.7242608070373535, |
| "learning_rate": 9.607544517338333e-06, |
| "loss": 0.6082, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6326148078725399, |
| "grad_norm": 9.071741104125977, |
| "learning_rate": 9.604615745079663e-06, |
| "loss": 0.6369, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.6373008434864105, |
| "grad_norm": 5.471718788146973, |
| "learning_rate": 9.601686972820994e-06, |
| "loss": 0.6235, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.6419868791002812, |
| "grad_norm": 6.0755934715271, |
| "learning_rate": 9.598758200562325e-06, |
| "loss": 0.6197, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6466729147141518, |
| "grad_norm": 5.650800704956055, |
| "learning_rate": 9.595829428303656e-06, |
| "loss": 0.5947, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6513589503280225, |
| "grad_norm": 4.409568786621094, |
| "learning_rate": 9.592900656044986e-06, |
| "loss": 0.6632, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6560449859418932, |
| "grad_norm": 6.575608730316162, |
| "learning_rate": 9.589971883786317e-06, |
| "loss": 0.5655, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6607310215557638, |
| "grad_norm": 4.897518634796143, |
| "learning_rate": 9.587043111527648e-06, |
| "loss": 0.6064, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6654170571696345, |
| "grad_norm": 4.505845546722412, |
| "learning_rate": 9.58411433926898e-06, |
| "loss": 0.6217, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6701030927835051, |
| "grad_norm": 11.04179573059082, |
| "learning_rate": 9.58118556701031e-06, |
| "loss": 0.626, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.6747891283973758, |
| "grad_norm": 7.031481742858887, |
| "learning_rate": 9.578256794751642e-06, |
| "loss": 0.6644, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6794751640112465, |
| "grad_norm": 5.177082061767578, |
| "learning_rate": 9.575328022492971e-06, |
| "loss": 0.5794, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6841611996251171, |
| "grad_norm": 5.830789566040039, |
| "learning_rate": 9.572399250234303e-06, |
| "loss": 0.5962, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.6888472352389878, |
| "grad_norm": 5.322279453277588, |
| "learning_rate": 9.569470477975634e-06, |
| "loss": 0.5528, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6935332708528584, |
| "grad_norm": 5.191045761108398, |
| "learning_rate": 9.566541705716965e-06, |
| "loss": 0.602, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6982193064667291, |
| "grad_norm": 4.832320213317871, |
| "learning_rate": 9.563612933458294e-06, |
| "loss": 0.5732, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.7029053420805998, |
| "grad_norm": 5.9457926750183105, |
| "learning_rate": 9.560684161199626e-06, |
| "loss": 0.6017, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7029053420805998, |
| "eval_loss": 0.059113115072250366, |
| "eval_pearson_cosine": 0.7675747794888963, |
| "eval_pearson_dot": 0.6475892776570333, |
| "eval_pearson_euclidean": 0.7594640382486553, |
| "eval_pearson_manhattan": 0.7585029707701096, |
| "eval_runtime": 45.7613, |
| "eval_samples_per_second": 32.779, |
| "eval_spearman_cosine": 0.768339335776319, |
| "eval_spearman_dot": 0.655445685087582, |
| "eval_spearman_euclidean": 0.7680811238488432, |
| "eval_spearman_manhattan": 0.7673055147561156, |
| "eval_steps_per_second": 32.779, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.7075913776944704, |
| "grad_norm": 4.822035789489746, |
| "learning_rate": 9.557755388940957e-06, |
| "loss": 0.5891, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.7122774133083412, |
| "grad_norm": 7.0355753898620605, |
| "learning_rate": 9.554826616682288e-06, |
| "loss": 0.6019, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.7169634489222118, |
| "grad_norm": 7.064100742340088, |
| "learning_rate": 9.55189784442362e-06, |
| "loss": 0.5656, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.7216494845360825, |
| "grad_norm": 4.629329204559326, |
| "learning_rate": 9.54896907216495e-06, |
| "loss": 0.5839, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.7263355201499532, |
| "grad_norm": 5.421347141265869, |
| "learning_rate": 9.54604029990628e-06, |
| "loss": 0.5684, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.7310215557638238, |
| "grad_norm": 4.520521640777588, |
| "learning_rate": 9.543111527647611e-06, |
| "loss": 0.5979, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.7357075913776945, |
| "grad_norm": 5.172377109527588, |
| "learning_rate": 9.540182755388942e-06, |
| "loss": 0.5678, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7403936269915652, |
| "grad_norm": 5.090722560882568, |
| "learning_rate": 9.537253983130272e-06, |
| "loss": 0.556, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7450796626054358, |
| "grad_norm": 4.6714887619018555, |
| "learning_rate": 9.534325210871603e-06, |
| "loss": 0.564, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.7497656982193065, |
| "grad_norm": 4.211735248565674, |
| "learning_rate": 9.531396438612934e-06, |
| "loss": 0.617, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7544517338331771, |
| "grad_norm": 4.693171501159668, |
| "learning_rate": 9.528467666354265e-06, |
| "loss": 0.5657, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7591377694470478, |
| "grad_norm": 6.890966892242432, |
| "learning_rate": 9.525538894095597e-06, |
| "loss": 0.5838, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7638238050609185, |
| "grad_norm": 3.5127806663513184, |
| "learning_rate": 9.522610121836928e-06, |
| "loss": 0.5669, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7685098406747891, |
| "grad_norm": 4.389316082000732, |
| "learning_rate": 9.519681349578259e-06, |
| "loss": 0.5669, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7731958762886598, |
| "grad_norm": 4.59335470199585, |
| "learning_rate": 9.516752577319588e-06, |
| "loss": 0.604, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7778819119025304, |
| "grad_norm": 5.345147132873535, |
| "learning_rate": 9.51382380506092e-06, |
| "loss": 0.6132, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7825679475164011, |
| "grad_norm": 5.133398532867432, |
| "learning_rate": 9.510895032802249e-06, |
| "loss": 0.5539, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.7872539831302718, |
| "grad_norm": 7.907310962677002, |
| "learning_rate": 9.50796626054358e-06, |
| "loss": 0.61, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7919400187441424, |
| "grad_norm": 4.504448890686035, |
| "learning_rate": 9.505037488284911e-06, |
| "loss": 0.5851, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7966260543580131, |
| "grad_norm": 4.3662028312683105, |
| "learning_rate": 9.502108716026243e-06, |
| "loss": 0.5915, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.8013120899718837, |
| "grad_norm": 5.221836566925049, |
| "learning_rate": 9.499179943767574e-06, |
| "loss": 0.581, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.8059981255857545, |
| "grad_norm": 6.357667446136475, |
| "learning_rate": 9.496251171508905e-06, |
| "loss": 0.5937, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.8106841611996252, |
| "grad_norm": 6.262212753295898, |
| "learning_rate": 9.493322399250236e-06, |
| "loss": 0.606, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.8153701968134958, |
| "grad_norm": 4.363849639892578, |
| "learning_rate": 9.490393626991566e-06, |
| "loss": 0.5524, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.8200562324273665, |
| "grad_norm": 5.514476299285889, |
| "learning_rate": 9.487464854732897e-06, |
| "loss": 0.5611, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.8200562324273665, |
| "eval_loss": 0.05879165977239609, |
| "eval_pearson_cosine": 0.7714099892705395, |
| "eval_pearson_dot": 0.6462212772089089, |
| "eval_pearson_euclidean": 0.7641084348061273, |
| "eval_pearson_manhattan": 0.7629885828620147, |
| "eval_runtime": 43.6421, |
| "eval_samples_per_second": 34.37, |
| "eval_spearman_cosine": 0.7720168259371313, |
| "eval_spearman_dot": 0.6536245076677092, |
| "eval_spearman_euclidean": 0.7726348092699838, |
| "eval_spearman_manhattan": 0.7716062900578692, |
| "eval_steps_per_second": 34.37, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.8247422680412371, |
| "grad_norm": 6.260695457458496, |
| "learning_rate": 9.484536082474226e-06, |
| "loss": 0.5566, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.8294283036551078, |
| "grad_norm": 4.187561511993408, |
| "learning_rate": 9.481607310215558e-06, |
| "loss": 0.5077, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.8341143392689785, |
| "grad_norm": 4.611522197723389, |
| "learning_rate": 9.478678537956889e-06, |
| "loss": 0.5449, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.8388003748828491, |
| "grad_norm": 12.466484069824219, |
| "learning_rate": 9.47574976569822e-06, |
| "loss": 0.5744, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8434864104967198, |
| "grad_norm": 4.683777332305908, |
| "learning_rate": 9.472820993439551e-06, |
| "loss": 0.5102, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8481724461105904, |
| "grad_norm": 5.541889190673828, |
| "learning_rate": 9.469892221180882e-06, |
| "loss": 0.5589, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8528584817244611, |
| "grad_norm": 8.524742126464844, |
| "learning_rate": 9.466963448922214e-06, |
| "loss": 0.5872, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8575445173383318, |
| "grad_norm": 7.117620944976807, |
| "learning_rate": 9.464034676663543e-06, |
| "loss": 0.5484, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.8622305529522024, |
| "grad_norm": 5.3457841873168945, |
| "learning_rate": 9.461105904404874e-06, |
| "loss": 0.5624, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8669165885660731, |
| "grad_norm": 4.375561714172363, |
| "learning_rate": 9.458177132146204e-06, |
| "loss": 0.525, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8716026241799437, |
| "grad_norm": 4.6026082038879395, |
| "learning_rate": 9.455248359887535e-06, |
| "loss": 0.5855, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8762886597938144, |
| "grad_norm": 5.399001121520996, |
| "learning_rate": 9.452319587628866e-06, |
| "loss": 0.5775, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8809746954076851, |
| "grad_norm": 3.9378573894500732, |
| "learning_rate": 9.449390815370197e-06, |
| "loss": 0.5068, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.8856607310215557, |
| "grad_norm": 5.515146255493164, |
| "learning_rate": 9.446462043111529e-06, |
| "loss": 0.5718, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8903467666354264, |
| "grad_norm": 4.8671345710754395, |
| "learning_rate": 9.44353327085286e-06, |
| "loss": 0.5552, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.895032802249297, |
| "grad_norm": 5.388006210327148, |
| "learning_rate": 9.440604498594191e-06, |
| "loss": 0.5854, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.8997188378631678, |
| "grad_norm": 6.608395099639893, |
| "learning_rate": 9.43767572633552e-06, |
| "loss": 0.5459, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.9044048734770385, |
| "grad_norm": 4.6435160636901855, |
| "learning_rate": 9.434746954076852e-06, |
| "loss": 0.529, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 4.642300605773926, |
| "learning_rate": 9.431818181818183e-06, |
| "loss": 0.5255, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.9137769447047798, |
| "grad_norm": 5.40919828414917, |
| "learning_rate": 9.428889409559512e-06, |
| "loss": 0.5605, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.9184629803186504, |
| "grad_norm": 4.9874467849731445, |
| "learning_rate": 9.425960637300844e-06, |
| "loss": 0.5798, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.9231490159325211, |
| "grad_norm": 4.9304094314575195, |
| "learning_rate": 9.423031865042175e-06, |
| "loss": 0.5576, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.9278350515463918, |
| "grad_norm": 5.080467224121094, |
| "learning_rate": 9.420103092783506e-06, |
| "loss": 0.5221, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.9325210871602624, |
| "grad_norm": 5.083141326904297, |
| "learning_rate": 9.417174320524837e-06, |
| "loss": 0.6041, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.9372071227741331, |
| "grad_norm": 3.8194010257720947, |
| "learning_rate": 9.414245548266168e-06, |
| "loss": 0.5439, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9372071227741331, |
| "eval_loss": 0.058015577495098114, |
| "eval_pearson_cosine": 0.7772706274362164, |
| "eval_pearson_dot": 0.6518150260238968, |
| "eval_pearson_euclidean": 0.7681856098914253, |
| "eval_pearson_manhattan": 0.7668726914631314, |
| "eval_runtime": 45.6952, |
| "eval_samples_per_second": 32.826, |
| "eval_spearman_cosine": 0.7781983730395821, |
| "eval_spearman_dot": 0.6578238148510893, |
| "eval_spearman_euclidean": 0.7779674226973379, |
| "eval_spearman_manhattan": 0.7766391726420421, |
| "eval_steps_per_second": 32.826, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.9418931583880038, |
| "grad_norm": 5.383081912994385, |
| "learning_rate": 9.411316776007498e-06, |
| "loss": 0.5343, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9465791940018744, |
| "grad_norm": 5.533719539642334, |
| "learning_rate": 9.408388003748829e-06, |
| "loss": 0.5313, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9512652296157451, |
| "grad_norm": 4.267172336578369, |
| "learning_rate": 9.40545923149016e-06, |
| "loss": 0.5172, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9559512652296157, |
| "grad_norm": 4.8553009033203125, |
| "learning_rate": 9.402530459231491e-06, |
| "loss": 0.5104, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.9606373008434864, |
| "grad_norm": 6.460834503173828, |
| "learning_rate": 9.399601686972821e-06, |
| "loss": 0.5225, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.9653233364573571, |
| "grad_norm": 27.46290397644043, |
| "learning_rate": 9.396672914714152e-06, |
| "loss": 0.544, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.9700093720712277, |
| "grad_norm": 4.89717435836792, |
| "learning_rate": 9.393744142455483e-06, |
| "loss": 0.5653, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.9746954076850984, |
| "grad_norm": 4.803583145141602, |
| "learning_rate": 9.390815370196814e-06, |
| "loss": 0.5739, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.979381443298969, |
| "grad_norm": 4.121029853820801, |
| "learning_rate": 9.387886597938146e-06, |
| "loss": 0.5192, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.9840674789128397, |
| "grad_norm": 4.464984893798828, |
| "learning_rate": 9.384957825679475e-06, |
| "loss": 0.5393, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9887535145267105, |
| "grad_norm": 6.364498615264893, |
| "learning_rate": 9.382029053420806e-06, |
| "loss": 0.5764, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.993439550140581, |
| "grad_norm": 3.743790864944458, |
| "learning_rate": 9.379100281162138e-06, |
| "loss": 0.5276, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.9981255857544518, |
| "grad_norm": 4.737389087677002, |
| "learning_rate": 9.376171508903469e-06, |
| "loss": 0.5211, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.0028116213683225, |
| "grad_norm": 3.622758626937866, |
| "learning_rate": 9.3732427366448e-06, |
| "loss": 0.5329, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.007497656982193, |
| "grad_norm": 3.5359978675842285, |
| "learning_rate": 9.37031396438613e-06, |
| "loss": 0.4941, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.0121836925960637, |
| "grad_norm": 4.669582843780518, |
| "learning_rate": 9.36738519212746e-06, |
| "loss": 0.4821, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.0168697282099344, |
| "grad_norm": 3.767122507095337, |
| "learning_rate": 9.364456419868792e-06, |
| "loss": 0.4886, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.021555763823805, |
| "grad_norm": 3.9681687355041504, |
| "learning_rate": 9.361527647610123e-06, |
| "loss": 0.493, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.0262417994376758, |
| "grad_norm": 3.389897108078003, |
| "learning_rate": 9.358598875351454e-06, |
| "loss": 0.4688, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.0309278350515463, |
| "grad_norm": 3.5152347087860107, |
| "learning_rate": 9.355670103092784e-06, |
| "loss": 0.4625, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.035613870665417, |
| "grad_norm": 3.23901629447937, |
| "learning_rate": 9.352741330834115e-06, |
| "loss": 0.5143, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.0402999062792877, |
| "grad_norm": 4.617633819580078, |
| "learning_rate": 9.349812558575446e-06, |
| "loss": 0.4732, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.0449859418931584, |
| "grad_norm": 5.245469570159912, |
| "learning_rate": 9.346883786316777e-06, |
| "loss": 0.5213, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.0496719775070291, |
| "grad_norm": 4.20419454574585, |
| "learning_rate": 9.343955014058108e-06, |
| "loss": 0.5042, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.0543580131208996, |
| "grad_norm": 4.6322102546691895, |
| "learning_rate": 9.341026241799438e-06, |
| "loss": 0.4982, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.0543580131208996, |
| "eval_loss": 0.05779802054166794, |
| "eval_pearson_cosine": 0.7770314842083366, |
| "eval_pearson_dot": 0.6498110843024136, |
| "eval_pearson_euclidean": 0.7709013065859232, |
| "eval_pearson_manhattan": 0.7695278239114174, |
| "eval_runtime": 48.4856, |
| "eval_samples_per_second": 30.937, |
| "eval_spearman_cosine": 0.7783328375480574, |
| "eval_spearman_dot": 0.6551905692522538, |
| "eval_spearman_euclidean": 0.7802862933680744, |
| "eval_spearman_manhattan": 0.7790525675974715, |
| "eval_steps_per_second": 30.937, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.0590440487347703, |
| "grad_norm": 4.474431991577148, |
| "learning_rate": 9.33809746954077e-06, |
| "loss": 0.5227, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.063730084348641, |
| "grad_norm": 4.538947105407715, |
| "learning_rate": 9.3351686972821e-06, |
| "loss": 0.5158, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.0684161199625117, |
| "grad_norm": 6.6143693923950195, |
| "learning_rate": 9.332239925023432e-06, |
| "loss": 0.461, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.0731021555763824, |
| "grad_norm": 4.316189765930176, |
| "learning_rate": 9.329311152764761e-06, |
| "loss": 0.5079, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.077788191190253, |
| "grad_norm": 4.054687976837158, |
| "learning_rate": 9.326382380506092e-06, |
| "loss": 0.5022, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0824742268041236, |
| "grad_norm": 4.232051849365234, |
| "learning_rate": 9.323453608247423e-06, |
| "loss": 0.5096, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.0871602624179943, |
| "grad_norm": 3.7785236835479736, |
| "learning_rate": 9.320524835988755e-06, |
| "loss": 0.4614, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.091846298031865, |
| "grad_norm": 4.865905284881592, |
| "learning_rate": 9.317596063730086e-06, |
| "loss": 0.5135, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.0965323336457358, |
| "grad_norm": 4.681485176086426, |
| "learning_rate": 9.314667291471417e-06, |
| "loss": 0.5061, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.1012183692596063, |
| "grad_norm": 4.256619453430176, |
| "learning_rate": 9.311738519212747e-06, |
| "loss": 0.4627, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.105904404873477, |
| "grad_norm": 4.459606170654297, |
| "learning_rate": 9.308809746954078e-06, |
| "loss": 0.5171, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.1105904404873477, |
| "grad_norm": 4.008665084838867, |
| "learning_rate": 9.305880974695409e-06, |
| "loss": 0.4422, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.1152764761012184, |
| "grad_norm": 3.674177885055542, |
| "learning_rate": 9.302952202436738e-06, |
| "loss": 0.5233, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.119962511715089, |
| "grad_norm": 4.463940620422363, |
| "learning_rate": 9.30002343017807e-06, |
| "loss": 0.4731, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.1246485473289598, |
| "grad_norm": 3.9289097785949707, |
| "learning_rate": 9.2970946579194e-06, |
| "loss": 0.4869, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.1293345829428303, |
| "grad_norm": 4.097565174102783, |
| "learning_rate": 9.294165885660732e-06, |
| "loss": 0.4594, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.134020618556701, |
| "grad_norm": 4.55318546295166, |
| "learning_rate": 9.291237113402063e-06, |
| "loss": 0.494, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.1387066541705717, |
| "grad_norm": 4.425617694854736, |
| "learning_rate": 9.288308341143394e-06, |
| "loss": 0.4829, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.1433926897844424, |
| "grad_norm": 3.908015489578247, |
| "learning_rate": 9.285379568884726e-06, |
| "loss": 0.4793, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.148078725398313, |
| "grad_norm": 3.7293996810913086, |
| "learning_rate": 9.282450796626055e-06, |
| "loss": 0.5399, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.1527647610121836, |
| "grad_norm": 4.584681034088135, |
| "learning_rate": 9.279522024367386e-06, |
| "loss": 0.4479, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.1574507966260543, |
| "grad_norm": 4.109914302825928, |
| "learning_rate": 9.276593252108716e-06, |
| "loss": 0.4599, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.162136832239925, |
| "grad_norm": 4.446422100067139, |
| "learning_rate": 9.273664479850047e-06, |
| "loss": 0.4727, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.1668228678537957, |
| "grad_norm": 5.975160598754883, |
| "learning_rate": 9.270735707591378e-06, |
| "loss": 0.4509, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.1715089034676662, |
| "grad_norm": 4.379275321960449, |
| "learning_rate": 9.26780693533271e-06, |
| "loss": 0.4828, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.1715089034676662, |
| "eval_loss": 0.05214480683207512, |
| "eval_pearson_cosine": 0.7792755247272061, |
| "eval_pearson_dot": 0.6569300577465214, |
| "eval_pearson_euclidean": 0.7718322585231894, |
| "eval_pearson_manhattan": 0.7703922250718165, |
| "eval_runtime": 47.8089, |
| "eval_samples_per_second": 31.375, |
| "eval_spearman_cosine": 0.7799819701975583, |
| "eval_spearman_dot": 0.662507389274304, |
| "eval_spearman_euclidean": 0.7818437831063969, |
| "eval_spearman_manhattan": 0.7805341558401507, |
| "eval_steps_per_second": 31.375, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.176194939081537, |
| "grad_norm": 3.5287399291992188, |
| "learning_rate": 9.26487816307404e-06, |
| "loss": 0.4591, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.1808809746954076, |
| "grad_norm": 3.277655601501465, |
| "learning_rate": 9.261949390815372e-06, |
| "loss": 0.4479, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.1855670103092784, |
| "grad_norm": 4.732039451599121, |
| "learning_rate": 9.259020618556703e-06, |
| "loss": 0.461, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.190253045923149, |
| "grad_norm": 4.4760966300964355, |
| "learning_rate": 9.256091846298032e-06, |
| "loss": 0.4652, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.1949390815370198, |
| "grad_norm": 7.485498428344727, |
| "learning_rate": 9.253163074039364e-06, |
| "loss": 0.4779, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.1996251171508903, |
| "grad_norm": 3.9956140518188477, |
| "learning_rate": 9.250234301780693e-06, |
| "loss": 0.4567, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.204311152764761, |
| "grad_norm": 3.547563314437866, |
| "learning_rate": 9.247305529522024e-06, |
| "loss": 0.4988, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.2089971883786317, |
| "grad_norm": 5.354389667510986, |
| "learning_rate": 9.244376757263355e-06, |
| "loss": 0.464, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.2136832239925024, |
| "grad_norm": 3.791760206222534, |
| "learning_rate": 9.241447985004687e-06, |
| "loss": 0.4441, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.218369259606373, |
| "grad_norm": 4.77889347076416, |
| "learning_rate": 9.238519212746018e-06, |
| "loss": 0.4655, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.2230552952202436, |
| "grad_norm": 5.804917335510254, |
| "learning_rate": 9.235590440487349e-06, |
| "loss": 0.4912, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.2277413308341143, |
| "grad_norm": 3.841860771179199, |
| "learning_rate": 9.23266166822868e-06, |
| "loss": 0.472, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.232427366447985, |
| "grad_norm": 4.4197540283203125, |
| "learning_rate": 9.22973289597001e-06, |
| "loss": 0.4821, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.2371134020618557, |
| "grad_norm": 5.844490051269531, |
| "learning_rate": 9.226804123711341e-06, |
| "loss": 0.5655, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.2417994376757264, |
| "grad_norm": 3.5442116260528564, |
| "learning_rate": 9.223875351452672e-06, |
| "loss": 0.4532, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.246485473289597, |
| "grad_norm": 5.259571075439453, |
| "learning_rate": 9.220946579194002e-06, |
| "loss": 0.4856, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.2511715089034676, |
| "grad_norm": 4.675846576690674, |
| "learning_rate": 9.218017806935333e-06, |
| "loss": 0.4576, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.2558575445173383, |
| "grad_norm": 5.236482620239258, |
| "learning_rate": 9.215089034676664e-06, |
| "loss": 0.513, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.260543580131209, |
| "grad_norm": 4.658278465270996, |
| "learning_rate": 9.212160262417995e-06, |
| "loss": 0.4734, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.2652296157450795, |
| "grad_norm": 3.7085494995117188, |
| "learning_rate": 9.209231490159326e-06, |
| "loss": 0.5279, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.2699156513589505, |
| "grad_norm": 3.4627673625946045, |
| "learning_rate": 9.206302717900658e-06, |
| "loss": 0.4773, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.274601686972821, |
| "grad_norm": 4.618409633636475, |
| "learning_rate": 9.203373945641987e-06, |
| "loss": 0.4354, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.2792877225866917, |
| "grad_norm": 3.1090590953826904, |
| "learning_rate": 9.200445173383318e-06, |
| "loss": 0.4409, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.2839737582005624, |
| "grad_norm": 4.328725337982178, |
| "learning_rate": 9.19751640112465e-06, |
| "loss": 0.4799, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.2886597938144329, |
| "grad_norm": 3.8362419605255127, |
| "learning_rate": 9.194587628865979e-06, |
| "loss": 0.5062, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.2886597938144329, |
| "eval_loss": 0.05263364687561989, |
| "eval_pearson_cosine": 0.7755555336434341, |
| "eval_pearson_dot": 0.6502184577290961, |
| "eval_pearson_euclidean": 0.7709853609297426, |
| "eval_pearson_manhattan": 0.769572635033791, |
| "eval_runtime": 44.8508, |
| "eval_samples_per_second": 33.444, |
| "eval_spearman_cosine": 0.7765036654281985, |
| "eval_spearman_dot": 0.6558936409143281, |
| "eval_spearman_euclidean": 0.7808945633743188, |
| "eval_spearman_manhattan": 0.7795729380744477, |
| "eval_steps_per_second": 33.444, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.2933458294283038, |
| "grad_norm": 3.6972432136535645, |
| "learning_rate": 9.19165885660731e-06, |
| "loss": 0.488, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.2980318650421743, |
| "grad_norm": 6.73103141784668, |
| "learning_rate": 9.188730084348641e-06, |
| "loss": 0.4553, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.302717900656045, |
| "grad_norm": 4.371028423309326, |
| "learning_rate": 9.185801312089973e-06, |
| "loss": 0.4555, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.3074039362699157, |
| "grad_norm": 3.4788401126861572, |
| "learning_rate": 9.182872539831304e-06, |
| "loss": 0.4561, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.3120899718837864, |
| "grad_norm": 3.832277774810791, |
| "learning_rate": 9.179943767572635e-06, |
| "loss": 0.4838, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.316776007497657, |
| "grad_norm": 3.5579423904418945, |
| "learning_rate": 9.177014995313966e-06, |
| "loss": 0.4404, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.3214620431115276, |
| "grad_norm": 3.7768073081970215, |
| "learning_rate": 9.174086223055296e-06, |
| "loss": 0.4724, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.3261480787253983, |
| "grad_norm": 3.957035779953003, |
| "learning_rate": 9.171157450796627e-06, |
| "loss": 0.471, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.330834114339269, |
| "grad_norm": 3.6035895347595215, |
| "learning_rate": 9.168228678537958e-06, |
| "loss": 0.4645, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.3355201499531397, |
| "grad_norm": 4.358327388763428, |
| "learning_rate": 9.165299906279288e-06, |
| "loss": 0.4301, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.3402061855670104, |
| "grad_norm": 3.4666709899902344, |
| "learning_rate": 9.162371134020619e-06, |
| "loss": 0.4508, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.344892221180881, |
| "grad_norm": 3.912290096282959, |
| "learning_rate": 9.15944236176195e-06, |
| "loss": 0.4379, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.3495782567947516, |
| "grad_norm": 4.305796146392822, |
| "learning_rate": 9.156513589503281e-06, |
| "loss": 0.4194, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.3542642924086223, |
| "grad_norm": 4.231681823730469, |
| "learning_rate": 9.153584817244612e-06, |
| "loss": 0.4017, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.358950328022493, |
| "grad_norm": 4.43821382522583, |
| "learning_rate": 9.150656044985944e-06, |
| "loss": 0.4185, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.3636363636363638, |
| "grad_norm": 4.922164440155029, |
| "learning_rate": 9.147727272727273e-06, |
| "loss": 0.5199, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.3683223992502342, |
| "grad_norm": 4.577489852905273, |
| "learning_rate": 9.144798500468604e-06, |
| "loss": 0.4237, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.373008434864105, |
| "grad_norm": 3.9537651538848877, |
| "learning_rate": 9.141869728209935e-06, |
| "loss": 0.4888, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.3776944704779757, |
| "grad_norm": 4.165870189666748, |
| "learning_rate": 9.138940955951267e-06, |
| "loss": 0.4476, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.3823805060918464, |
| "grad_norm": 4.492893218994141, |
| "learning_rate": 9.136012183692596e-06, |
| "loss": 0.5159, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.387066541705717, |
| "grad_norm": 3.847490072250366, |
| "learning_rate": 9.133083411433927e-06, |
| "loss": 0.4497, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.3917525773195876, |
| "grad_norm": 6.766137599945068, |
| "learning_rate": 9.130154639175258e-06, |
| "loss": 0.4379, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.3964386129334583, |
| "grad_norm": 3.9198007583618164, |
| "learning_rate": 9.12722586691659e-06, |
| "loss": 0.4519, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.401124648547329, |
| "grad_norm": 3.67480731010437, |
| "learning_rate": 9.124297094657921e-06, |
| "loss": 0.4108, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.4058106841611997, |
| "grad_norm": 3.3013832569122314, |
| "learning_rate": 9.12136832239925e-06, |
| "loss": 0.433, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.4058106841611997, |
| "eval_loss": 0.0497601218521595, |
| "eval_pearson_cosine": 0.7834985989633054, |
| "eval_pearson_dot": 0.6669524421664974, |
| "eval_pearson_euclidean": 0.7743874834934843, |
| "eval_pearson_manhattan": 0.7730376146204847, |
| "eval_runtime": 47.8141, |
| "eval_samples_per_second": 31.371, |
| "eval_spearman_cosine": 0.7845889452017747, |
| "eval_spearman_dot": 0.6729435548765089, |
| "eval_spearman_euclidean": 0.784591658726837, |
| "eval_spearman_manhattan": 0.7832975474858643, |
| "eval_steps_per_second": 31.371, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.4104967197750704, |
| "grad_norm": 4.2792487144470215, |
| "learning_rate": 9.118439550140582e-06, |
| "loss": 0.4878, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.415182755388941, |
| "grad_norm": 3.8892383575439453, |
| "learning_rate": 9.115510777881913e-06, |
| "loss": 0.4676, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.4198687910028116, |
| "grad_norm": 5.0008745193481445, |
| "learning_rate": 9.112582005623244e-06, |
| "loss": 0.4729, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.4245548266166823, |
| "grad_norm": 5.607409477233887, |
| "learning_rate": 9.109653233364575e-06, |
| "loss": 0.4762, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.429240862230553, |
| "grad_norm": 3.0340139865875244, |
| "learning_rate": 9.106724461105905e-06, |
| "loss": 0.4438, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.4339268978444237, |
| "grad_norm": 4.310724258422852, |
| "learning_rate": 9.103795688847236e-06, |
| "loss": 0.4499, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.4386129334582942, |
| "grad_norm": 4.481917381286621, |
| "learning_rate": 9.100866916588567e-06, |
| "loss": 0.4493, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.443298969072165, |
| "grad_norm": 4.330621719360352, |
| "learning_rate": 9.097938144329898e-06, |
| "loss": 0.4505, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.4479850046860356, |
| "grad_norm": 4.335081577301025, |
| "learning_rate": 9.095009372071228e-06, |
| "loss": 0.446, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.4526710402999063, |
| "grad_norm": 3.0894672870635986, |
| "learning_rate": 9.092080599812559e-06, |
| "loss": 0.4404, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.457357075913777, |
| "grad_norm": 4.6363983154296875, |
| "learning_rate": 9.08915182755389e-06, |
| "loss": 0.5358, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.4620431115276475, |
| "grad_norm": 3.80387806892395, |
| "learning_rate": 9.086223055295221e-06, |
| "loss": 0.4374, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.4667291471415183, |
| "grad_norm": 3.276442289352417, |
| "learning_rate": 9.083294283036552e-06, |
| "loss": 0.5013, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.471415182755389, |
| "grad_norm": 3.843419075012207, |
| "learning_rate": 9.080365510777884e-06, |
| "loss": 0.4694, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.4761012183692597, |
| "grad_norm": 4.7606730461120605, |
| "learning_rate": 9.077436738519213e-06, |
| "loss": 0.4215, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.4807872539831304, |
| "grad_norm": 3.739225149154663, |
| "learning_rate": 9.074507966260544e-06, |
| "loss": 0.4756, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.4854732895970009, |
| "grad_norm": 3.36938214302063, |
| "learning_rate": 9.071579194001876e-06, |
| "loss": 0.4243, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.4901593252108716, |
| "grad_norm": 6.589993476867676, |
| "learning_rate": 9.068650421743205e-06, |
| "loss": 0.4698, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.4948453608247423, |
| "grad_norm": 3.8416695594787598, |
| "learning_rate": 9.065721649484536e-06, |
| "loss": 0.4964, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.499531396438613, |
| "grad_norm": 4.367741584777832, |
| "learning_rate": 9.062792877225867e-06, |
| "loss": 0.4417, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.5042174320524837, |
| "grad_norm": 3.500617742538452, |
| "learning_rate": 9.059864104967199e-06, |
| "loss": 0.4522, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.5089034676663542, |
| "grad_norm": 3.5349769592285156, |
| "learning_rate": 9.05693533270853e-06, |
| "loss": 0.4393, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.513589503280225, |
| "grad_norm": 3.8469526767730713, |
| "learning_rate": 9.054006560449861e-06, |
| "loss": 0.4453, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.5182755388940956, |
| "grad_norm": 3.209933280944824, |
| "learning_rate": 9.051077788191192e-06, |
| "loss": 0.4599, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.522961574507966, |
| "grad_norm": 3.7976036071777344, |
| "learning_rate": 9.048149015932522e-06, |
| "loss": 0.4373, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.522961574507966, |
| "eval_loss": 0.049798864871263504, |
| "eval_pearson_cosine": 0.7866421286010308, |
| "eval_pearson_dot": 0.6641640853451243, |
| "eval_pearson_euclidean": 0.7777378719378305, |
| "eval_pearson_manhattan": 0.7764827785285746, |
| "eval_runtime": 43.7509, |
| "eval_samples_per_second": 34.285, |
| "eval_spearman_cosine": 0.7870351053050699, |
| "eval_spearman_dot": 0.6708598238937284, |
| "eval_spearman_euclidean": 0.7874683707378692, |
| "eval_spearman_manhattan": 0.7865203522698128, |
| "eval_steps_per_second": 34.285, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.527647610121837, |
| "grad_norm": 4.851262092590332, |
| "learning_rate": 9.045220243673853e-06, |
| "loss": 0.491, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.5323336457357075, |
| "grad_norm": 4.183891773223877, |
| "learning_rate": 9.042291471415184e-06, |
| "loss": 0.453, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.5370196813495782, |
| "grad_norm": 4.280774116516113, |
| "learning_rate": 9.039362699156514e-06, |
| "loss": 0.4413, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.541705716963449, |
| "grad_norm": 4.118307113647461, |
| "learning_rate": 9.036433926897845e-06, |
| "loss": 0.4661, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.5463917525773194, |
| "grad_norm": 5.99712610244751, |
| "learning_rate": 9.033505154639176e-06, |
| "loss": 0.5205, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.5510777881911904, |
| "grad_norm": 4.146691799163818, |
| "learning_rate": 9.030576382380507e-06, |
| "loss": 0.428, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.5557638238050608, |
| "grad_norm": 3.899887800216675, |
| "learning_rate": 9.027647610121838e-06, |
| "loss": 0.4564, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.5604498594189316, |
| "grad_norm": 3.9663302898406982, |
| "learning_rate": 9.02471883786317e-06, |
| "loss": 0.4539, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.5651358950328023, |
| "grad_norm": 3.526458263397217, |
| "learning_rate": 9.021790065604499e-06, |
| "loss": 0.4844, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.569821930646673, |
| "grad_norm": 4.192911624908447, |
| "learning_rate": 9.01886129334583e-06, |
| "loss": 0.4278, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.5745079662605437, |
| "grad_norm": 4.185749530792236, |
| "learning_rate": 9.015932521087161e-06, |
| "loss": 0.4632, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.5791940018744142, |
| "grad_norm": 3.411773204803467, |
| "learning_rate": 9.013003748828491e-06, |
| "loss": 0.436, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.5838800374882849, |
| "grad_norm": 4.467881679534912, |
| "learning_rate": 9.010074976569822e-06, |
| "loss": 0.4133, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.5885660731021556, |
| "grad_norm": 3.77736496925354, |
| "learning_rate": 9.007146204311153e-06, |
| "loss": 0.4452, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.5932521087160263, |
| "grad_norm": 4.084095478057861, |
| "learning_rate": 9.004217432052485e-06, |
| "loss": 0.4605, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.597938144329897, |
| "grad_norm": 3.3393008708953857, |
| "learning_rate": 9.001288659793816e-06, |
| "loss": 0.4157, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.6026241799437675, |
| "grad_norm": 3.096881151199341, |
| "learning_rate": 8.998359887535147e-06, |
| "loss": 0.4478, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.6073102155576382, |
| "grad_norm": 3.0557243824005127, |
| "learning_rate": 8.995431115276478e-06, |
| "loss": 0.4452, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.611996251171509, |
| "grad_norm": 3.7997219562530518, |
| "learning_rate": 8.992502343017808e-06, |
| "loss": 0.4287, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.6166822867853796, |
| "grad_norm": 3.6995465755462646, |
| "learning_rate": 8.989573570759139e-06, |
| "loss": 0.4423, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.6213683223992503, |
| "grad_norm": 4.1384053230285645, |
| "learning_rate": 8.986644798500468e-06, |
| "loss": 0.4563, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.6260543580131208, |
| "grad_norm": 4.637014865875244, |
| "learning_rate": 8.9837160262418e-06, |
| "loss": 0.4538, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.6307403936269915, |
| "grad_norm": 4.30952262878418, |
| "learning_rate": 8.98078725398313e-06, |
| "loss": 0.3993, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.6354264292408622, |
| "grad_norm": 4.746737003326416, |
| "learning_rate": 8.977858481724462e-06, |
| "loss": 0.4274, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.640112464854733, |
| "grad_norm": 3.8592286109924316, |
| "learning_rate": 8.974929709465793e-06, |
| "loss": 0.4066, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.640112464854733, |
| "eval_loss": 0.050406068563461304, |
| "eval_pearson_cosine": 0.7840015528942317, |
| "eval_pearson_dot": 0.659932129633507, |
| "eval_pearson_euclidean": 0.7769297052026758, |
| "eval_pearson_manhattan": 0.7754185185705609, |
| "eval_runtime": 44.0859, |
| "eval_samples_per_second": 34.024, |
| "eval_spearman_cosine": 0.7845451302239834, |
| "eval_spearman_dot": 0.6667296644451466, |
| "eval_spearman_euclidean": 0.7868327314956118, |
| "eval_spearman_manhattan": 0.7856021398727839, |
| "eval_steps_per_second": 34.024, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.6447985004686037, |
| "grad_norm": 5.983098030090332, |
| "learning_rate": 8.972000937207124e-06, |
| "loss": 0.4451, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.6494845360824741, |
| "grad_norm": 4.052550315856934, |
| "learning_rate": 8.969072164948455e-06, |
| "loss": 0.4331, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.6541705716963448, |
| "grad_norm": 3.7970380783081055, |
| "learning_rate": 8.966143392689785e-06, |
| "loss": 0.4427, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.6588566073102156, |
| "grad_norm": 4.695807456970215, |
| "learning_rate": 8.963214620431116e-06, |
| "loss": 0.4522, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.6635426429240863, |
| "grad_norm": 4.41202974319458, |
| "learning_rate": 8.960285848172446e-06, |
| "loss": 0.4275, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.668228678537957, |
| "grad_norm": 5.364877223968506, |
| "learning_rate": 8.957357075913777e-06, |
| "loss": 0.4321, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.6729147141518275, |
| "grad_norm": 3.801132917404175, |
| "learning_rate": 8.954428303655108e-06, |
| "loss": 0.4494, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.6776007497656982, |
| "grad_norm": 4.197866439819336, |
| "learning_rate": 8.95149953139644e-06, |
| "loss": 0.4126, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.6822867853795689, |
| "grad_norm": 5.34595251083374, |
| "learning_rate": 8.94857075913777e-06, |
| "loss": 0.4757, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.6869728209934396, |
| "grad_norm": 4.772789478302002, |
| "learning_rate": 8.945641986879102e-06, |
| "loss": 0.4037, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.6916588566073103, |
| "grad_norm": 4.81839656829834, |
| "learning_rate": 8.942713214620433e-06, |
| "loss": 0.4192, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.6963448922211808, |
| "grad_norm": 3.470919132232666, |
| "learning_rate": 8.939784442361762e-06, |
| "loss": 0.4106, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.7010309278350515, |
| "grad_norm": 3.2051522731781006, |
| "learning_rate": 8.936855670103094e-06, |
| "loss": 0.4162, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.7057169634489222, |
| "grad_norm": 3.8122334480285645, |
| "learning_rate": 8.933926897844423e-06, |
| "loss": 0.4054, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.710402999062793, |
| "grad_norm": 5.07956075668335, |
| "learning_rate": 8.930998125585754e-06, |
| "loss": 0.4164, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.7150890346766636, |
| "grad_norm": 3.754542112350464, |
| "learning_rate": 8.928069353327085e-06, |
| "loss": 0.3703, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.7197750702905341, |
| "grad_norm": 3.4620890617370605, |
| "learning_rate": 8.925140581068417e-06, |
| "loss": 0.4667, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.7244611059044048, |
| "grad_norm": 4.179393768310547, |
| "learning_rate": 8.922211808809748e-06, |
| "loss": 0.4384, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.7291471415182755, |
| "grad_norm": 3.0865719318389893, |
| "learning_rate": 8.919283036551079e-06, |
| "loss": 0.4248, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.7338331771321462, |
| "grad_norm": 3.9282147884368896, |
| "learning_rate": 8.91635426429241e-06, |
| "loss": 0.4231, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.738519212746017, |
| "grad_norm": 3.9746885299682617, |
| "learning_rate": 8.91342549203374e-06, |
| "loss": 0.4152, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.7432052483598874, |
| "grad_norm": 3.8340625762939453, |
| "learning_rate": 8.910496719775071e-06, |
| "loss": 0.4458, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.7478912839737581, |
| "grad_norm": 4.861859321594238, |
| "learning_rate": 8.907567947516402e-06, |
| "loss": 0.4274, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.7525773195876289, |
| "grad_norm": 3.3457283973693848, |
| "learning_rate": 8.904639175257732e-06, |
| "loss": 0.4534, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.7572633552014996, |
| "grad_norm": 4.057953834533691, |
| "learning_rate": 8.901710402999063e-06, |
| "loss": 0.484, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.7572633552014996, |
| "eval_loss": 0.05240313336253166, |
| "eval_pearson_cosine": 0.7879299521989642, |
| "eval_pearson_dot": 0.6605985065084816, |
| "eval_pearson_euclidean": 0.7797438530556207, |
| "eval_pearson_manhattan": 0.778216782480726, |
| "eval_runtime": 44.9916, |
| "eval_samples_per_second": 33.34, |
| "eval_spearman_cosine": 0.7888982276270184, |
| "eval_spearman_dot": 0.6669965792210436, |
| "eval_spearman_euclidean": 0.7899037728263932, |
| "eval_spearman_manhattan": 0.7886320032383264, |
| "eval_steps_per_second": 33.34, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.7619493908153703, |
| "grad_norm": 3.281102418899536, |
| "learning_rate": 8.898781630740394e-06, |
| "loss": 0.4074, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.7666354264292408, |
| "grad_norm": 4.710203170776367, |
| "learning_rate": 8.895852858481725e-06, |
| "loss": 0.4537, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.7713214620431117, |
| "grad_norm": 4.636346817016602, |
| "learning_rate": 8.892924086223056e-06, |
| "loss": 0.4348, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.7760074976569822, |
| "grad_norm": 4.518571376800537, |
| "learning_rate": 8.889995313964388e-06, |
| "loss": 0.4515, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.780693533270853, |
| "grad_norm": 4.0576066970825195, |
| "learning_rate": 8.887066541705717e-06, |
| "loss": 0.4276, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.7853795688847236, |
| "grad_norm": 5.657445430755615, |
| "learning_rate": 8.884137769447048e-06, |
| "loss": 0.4277, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.790065604498594, |
| "grad_norm": 5.393405437469482, |
| "learning_rate": 8.88120899718838e-06, |
| "loss": 0.428, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.794751640112465, |
| "grad_norm": 4.101112365722656, |
| "learning_rate": 8.87828022492971e-06, |
| "loss": 0.4489, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.7994376757263355, |
| "grad_norm": 3.531888246536255, |
| "learning_rate": 8.87535145267104e-06, |
| "loss": 0.3673, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.8041237113402062, |
| "grad_norm": 3.4490315914154053, |
| "learning_rate": 8.872422680412371e-06, |
| "loss": 0.4059, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.808809746954077, |
| "grad_norm": 3.034252643585205, |
| "learning_rate": 8.869493908153702e-06, |
| "loss": 0.3832, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.8134957825679474, |
| "grad_norm": 4.064283847808838, |
| "learning_rate": 8.866565135895034e-06, |
| "loss": 0.4704, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 3.2689194679260254, |
| "learning_rate": 8.863636363636365e-06, |
| "loss": 0.4428, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.8228678537956888, |
| "grad_norm": 3.173530101776123, |
| "learning_rate": 8.860707591377694e-06, |
| "loss": 0.4283, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.8275538894095595, |
| "grad_norm": 3.638122081756592, |
| "learning_rate": 8.857778819119026e-06, |
| "loss": 0.4225, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.8322399250234302, |
| "grad_norm": 3.636679172515869, |
| "learning_rate": 8.854850046860357e-06, |
| "loss": 0.4154, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.8369259606373007, |
| "grad_norm": 3.810847520828247, |
| "learning_rate": 8.851921274601688e-06, |
| "loss": 0.3931, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.8416119962511717, |
| "grad_norm": 3.7469394207000732, |
| "learning_rate": 8.848992502343019e-06, |
| "loss": 0.4472, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.8462980318650422, |
| "grad_norm": 4.962492942810059, |
| "learning_rate": 8.846063730084349e-06, |
| "loss": 0.4324, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.8509840674789129, |
| "grad_norm": 3.4641172885894775, |
| "learning_rate": 8.84313495782568e-06, |
| "loss": 0.4234, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.8556701030927836, |
| "grad_norm": 3.8601555824279785, |
| "learning_rate": 8.840206185567011e-06, |
| "loss": 0.4045, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.860356138706654, |
| "grad_norm": 6.290759086608887, |
| "learning_rate": 8.837277413308342e-06, |
| "loss": 0.4655, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.865042174320525, |
| "grad_norm": 3.5882256031036377, |
| "learning_rate": 8.834348641049673e-06, |
| "loss": 0.4298, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.8697282099343955, |
| "grad_norm": 3.133535623550415, |
| "learning_rate": 8.831419868791003e-06, |
| "loss": 0.4508, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.8744142455482662, |
| "grad_norm": 3.220383644104004, |
| "learning_rate": 8.828491096532334e-06, |
| "loss": 0.4348, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.8744142455482662, |
| "eval_loss": 0.04981923848390579, |
| "eval_pearson_cosine": 0.790612878761543, |
| "eval_pearson_dot": 0.6612786229229286, |
| "eval_pearson_euclidean": 0.7799249806775554, |
| "eval_pearson_manhattan": 0.7784476870813819, |
| "eval_runtime": 45.9371, |
| "eval_samples_per_second": 32.653, |
| "eval_spearman_cosine": 0.7908100570922554, |
| "eval_spearman_dot": 0.6689224987064551, |
| "eval_spearman_euclidean": 0.7902520878335856, |
| "eval_spearman_manhattan": 0.7892503488739743, |
| "eval_steps_per_second": 32.653, |
| "step": 4000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 4268, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|