| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 1.0315879583358765, | |
| "learning_rate": 9.6e-05, | |
| "loss": 0.8325, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.20474183559417725, | |
| "learning_rate": 0.00019600000000000002, | |
| "loss": 0.4556, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.22688248753547668, | |
| "learning_rate": 0.000296, | |
| "loss": 0.4015, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.19172126054763794, | |
| "learning_rate": 0.00039600000000000003, | |
| "loss": 0.3645, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.15229463577270508, | |
| "learning_rate": 0.000496, | |
| "loss": 0.3602, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.20351602137088776, | |
| "learning_rate": 0.000596, | |
| "loss": 0.3475, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.19812420010566711, | |
| "learning_rate": 0.000696, | |
| "loss": 0.345, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.18347425758838654, | |
| "learning_rate": 0.000796, | |
| "loss": 0.3327, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.2595237195491791, | |
| "learning_rate": 0.000896, | |
| "loss": 0.3302, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.2259773463010788, | |
| "learning_rate": 0.000996, | |
| "loss": 0.3392, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.2506314516067505, | |
| "learning_rate": 0.0009997192908557321, | |
| "loss": 0.328, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.22288699448108673, | |
| "learning_rate": 0.000998830238119205, | |
| "loss": 0.3248, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.29342198371887207, | |
| "learning_rate": 0.000997333437576437, | |
| "loss": 0.3206, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.23638910055160522, | |
| "learning_rate": 0.0009952307128483257, | |
| "loss": 0.3119, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.20430760085582733, | |
| "learning_rate": 0.0009925246257810518, | |
| "loss": 0.3029, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.2517499327659607, | |
| "learning_rate": 0.0009892184733248665, | |
| "loss": 0.3218, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.2284773737192154, | |
| "learning_rate": 0.0009853162835172637, | |
| "loss": 0.2969, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.26538360118865967, | |
| "learning_rate": 0.0009808228105754376, | |
| "loss": 0.3134, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.24021762609481812, | |
| "learning_rate": 0.0009757435291040016, | |
| "loss": 0.3076, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.27008429169654846, | |
| "learning_rate": 0.0009700846274250251, | |
| "loss": 0.2964, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 0.2879575788974762, | |
| "eval_runtime": 18.9021, | |
| "eval_samples_per_second": 54.121, | |
| "eval_steps_per_second": 0.846, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.2501317858695984, | |
| "learning_rate": 0.000963853000038517, | |
| "loss": 0.2981, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.19082719087600708, | |
| "learning_rate": 0.0009570562392225395, | |
| "loss": 0.2809, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 0.24488045275211334, | |
| "learning_rate": 0.0009497026257831855, | |
| "loss": 0.291, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.21422015130519867, | |
| "learning_rate": 0.0009418011189656941, | |
| "loss": 0.2851, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.19914306700229645, | |
| "learning_rate": 0.0009333613455389882, | |
| "loss": 0.2841, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.2367725670337677, | |
| "learning_rate": 0.000924393588066941, | |
| "loss": 0.2879, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.24288254976272583, | |
| "learning_rate": 0.0009149087723806549, | |
| "loss": 0.2986, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.22942474484443665, | |
| "learning_rate": 0.0009049184542670199, | |
| "loss": 0.2842, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 0.23413586616516113, | |
| "learning_rate": 0.0008944348053897671, | |
| "loss": 0.2925, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.22139714658260345, | |
| "learning_rate": 0.0008834705984601709, | |
| "loss": 0.2873, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.2082182914018631, | |
| "learning_rate": 0.0008720391916754683, | |
| "loss": 0.2712, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.24570266902446747, | |
| "learning_rate": 0.0008601545124439535, | |
| "loss": 0.2813, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 0.20145930349826813, | |
| "learning_rate": 0.0008478310404165754, | |
| "loss": 0.2678, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.1796109676361084, | |
| "learning_rate": 0.0008350837898457143, | |
| "loss": 0.2732, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.22070881724357605, | |
| "learning_rate": 0.0008219282912926269, | |
| "loss": 0.2744, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.18907737731933594, | |
| "learning_rate": 0.0008083805727058513, | |
| "loss": 0.2834, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.1631525754928589, | |
| "learning_rate": 0.0007944571398936193, | |
| "loss": 0.2615, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.1881309598684311, | |
| "learning_rate": 0.0007801749564140723, | |
| "loss": 0.2732, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 0.19089096784591675, | |
| "learning_rate": 0.0007655514229077783, | |
| "loss": 0.2705, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.17820408940315247, | |
| "learning_rate": 0.0007506043558977322, | |
| "loss": 0.2716, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 0.2520262598991394, | |
| "eval_runtime": 18.7237, | |
| "eval_samples_per_second": 54.637, | |
| "eval_steps_per_second": 0.855, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.1798166185617447, | |
| "learning_rate": 0.0007353519660826664, | |
| "loss": 0.2546, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.21515925228595734, | |
| "learning_rate": 0.00071981283615012, | |
| "loss": 0.2647, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 0.1794830709695816, | |
| "learning_rate": 0.0007040058981362964, | |
| "loss": 0.2577, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.18966132402420044, | |
| "learning_rate": 0.0006879504103602934, | |
| "loss": 0.2594, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.21163836121559143, | |
| "learning_rate": 0.0006716659339608077, | |
| "loss": 0.2725, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.17575646936893463, | |
| "learning_rate": 0.0006551723090639006, | |
| "loss": 0.2483, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.17414060235023499, | |
| "learning_rate": 0.0006384896306108612, | |
| "loss": 0.2555, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.16540151834487915, | |
| "learning_rate": 0.0006216382238756146, | |
| "loss": 0.2427, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.16461016237735748, | |
| "learning_rate": 0.0006046386197015076, | |
| "loss": 0.251, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.16811122000217438, | |
| "learning_rate": 0.0005875115294876381, | |
| "loss": 0.2451, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 0.17021119594573975, | |
| "learning_rate": 0.0005702778199552054, | |
| "loss": 0.2227, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.16884687542915344, | |
| "learning_rate": 0.000552958487724626, | |
| "loss": 0.2164, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 0.17195427417755127, | |
| "learning_rate": 0.0005355746337343836, | |
| "loss": 0.2406, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.15978871285915375, | |
| "learning_rate": 0.0005181474375327879, | |
| "loss": 0.2138, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 0.1938290148973465, | |
| "learning_rate": 0.0005006981314739573, | |
| "loss": 0.2322, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.1653754562139511, | |
| "learning_rate": 0.00048324797484946424, | |
| "loss": 0.2285, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.1400000000000001, | |
| "grad_norm": 0.16597716510295868, | |
| "learning_rate": 0.0004658182279871657, | |
| "loss": 0.2195, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.1771618276834488, | |
| "learning_rate": 0.00044843012634876645, | |
| "loss": 0.2311, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 0.1888245791196823, | |
| "learning_rate": 0.000431104854657681, | |
| "loss": 0.2224, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.19004148244857788, | |
| "learning_rate": 0.0004138635210887117, | |
| "loss": 0.2261, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 0.23447048664093018, | |
| "eval_runtime": 18.7321, | |
| "eval_samples_per_second": 54.612, | |
| "eval_steps_per_second": 0.854, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 0.15263979136943817, | |
| "learning_rate": 0.0003967271315509884, | |
| "loss": 0.2227, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.16230420768260956, | |
| "learning_rate": 0.0003797165640955041, | |
| "loss": 0.218, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 0.1780393123626709, | |
| "learning_rate": 0.0003628525434784268, | |
| "loss": 0.2228, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.1682538390159607, | |
| "learning_rate": 0.0003461556159111748, | |
| "loss": 0.2274, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.17813384532928467, | |
| "learning_rate": 0.0003296461240280242, | |
| "loss": 0.2095, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.17569252848625183, | |
| "learning_rate": 0.00031334418210174266, | |
| "loss": 0.2325, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.14148516952991486, | |
| "learning_rate": 0.0002972696515374455, | |
| "loss": 0.2152, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 0.14034417271614075, | |
| "learning_rate": 0.00028144211667453366, | |
| "loss": 0.2189, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 0.16050834953784943, | |
| "learning_rate": 0.00026588086092619277, | |
| "loss": 0.2252, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.1896321177482605, | |
| "learning_rate": 0.00025060484328552466, | |
| "loss": 0.2219, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 0.16782040894031525, | |
| "learning_rate": 0.00023563267522693415, | |
| "loss": 0.2139, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.1926344931125641, | |
| "learning_rate": 0.0002209825980309151, | |
| "loss": 0.2211, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 0.16422989964485168, | |
| "learning_rate": 0.00020667246055985938, | |
| "loss": 0.2215, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.16321636736392975, | |
| "learning_rate": 0.00019271969751196778, | |
| "loss": 0.2066, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.15386155247688293, | |
| "learning_rate": 0.00017914130817975592, | |
| "loss": 0.2179, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.1478998363018036, | |
| "learning_rate": 0.00016595383573903412, | |
| "loss": 0.2117, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 0.16174481809139252, | |
| "learning_rate": 0.0001531733470935976, | |
| "loss": 0.2149, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.13468210399150848, | |
| "learning_rate": 0.00014081541330017704, | |
| "loss": 0.2037, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 0.1549725979566574, | |
| "learning_rate": 0.00012889509059750602, | |
| "loss": 0.2096, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.15235114097595215, | |
| "learning_rate": 0.00011742690206261292, | |
| "loss": 0.215, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 0.22170303761959076, | |
| "eval_runtime": 18.729, | |
| "eval_samples_per_second": 54.621, | |
| "eval_steps_per_second": 0.854, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 0.16818706691265106, | |
| "learning_rate": 0.0001064248199166884, | |
| "loss": 0.2024, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.1615205854177475, | |
| "learning_rate": 9.590224850208645e-05, | |
| "loss": 0.2116, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.6600000000000001, | |
| "grad_norm": 0.15136006474494934, | |
| "learning_rate": 8.587200795119792e-05, | |
| "loss": 0.2159, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.17629875242710114, | |
| "learning_rate": 7.634631856709389e-05, | |
| "loss": 0.2173, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 0.17757442593574524, | |
| "learning_rate": 6.733678593496901e-05, | |
| "loss": 0.2165, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.17096401751041412, | |
| "learning_rate": 5.885438678252342e-05, | |
| "loss": 0.2104, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 0.1552375704050064, | |
| "learning_rate": 5.0909455606510726e-05, | |
| "loss": 0.2065, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.2247905433177948, | |
| "learning_rate": 4.3511672081746386e-05, | |
| "loss": 0.211, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 0.15804603695869446, | |
| "learning_rate": 3.667004926791395e-05, | |
| "loss": 0.2135, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.15289010107517242, | |
| "learning_rate": 3.0392922628540875e-05, | |
| "loss": 0.2079, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.8199999999999998, | |
| "grad_norm": 0.148426815867424, | |
| "learning_rate": 2.468793987551998e-05, | |
| "loss": 0.2048, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.1801871359348297, | |
| "learning_rate": 1.9562051651550784e-05, | |
| "loss": 0.1998, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.8599999999999999, | |
| "grad_norm": 0.15381543338298798, | |
| "learning_rate": 1.5021503061851349e-05, | |
| "loss": 0.2127, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.1602175533771515, | |
| "learning_rate": 1.1071826065460589e-05, | |
| "loss": 0.1991, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 0.17019180953502655, | |
| "learning_rate": 7.717832735397334e-06, | |
| "loss": 0.2036, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.19569824635982513, | |
| "learning_rate": 4.963609395891299e-06, | |
| "loss": 0.2034, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 0.15439940989017487, | |
| "learning_rate": 2.81251164382601e-06, | |
| "loss": 0.2063, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.1678876131772995, | |
| "learning_rate": 1.267160260461253e-06, | |
| "loss": 0.1939, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 0.14893606305122375, | |
| "learning_rate": 3.2943801841439634e-07, | |
| "loss": 0.203, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.13442523777484894, | |
| "learning_rate": 4.873877924582715e-10, | |
| "loss": 0.2036, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.21816210448741913, | |
| "eval_runtime": 18.7265, | |
| "eval_samples_per_second": 54.629, | |
| "eval_steps_per_second": 0.854, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 2500, | |
| "total_flos": 1.62588235137024e+18, | |
| "train_loss": 0.26149349727630616, | |
| "train_runtime": 2422.0842, | |
| "train_samples_per_second": 33.029, | |
| "train_steps_per_second": 1.032 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 2500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.62588235137024e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |