Image-Text-to-Text
PEFT
Safetensors
laboratory
protocol-conditioned-action-prediction
lora
qwen
long-horizon-planning
conversational
Instructions to use Stanford-CongLab/LabHorizon-Model with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Stanford-CongLab/LabHorizon-Model with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen3.6-35B-A3B") model = PeftModel.from_pretrained(base_model, "Stanford-CongLab/LabHorizon-Model") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": 700, | |
| "best_metric": 0.44259119033813477, | |
| "best_model_checkpoint": "/data/taoyong/LabOS/QWEN-36/checkpoints/qwen3.6-35b-a3b-lora-lf/checkpoint-700", | |
| "epoch": 10.0, | |
| "eval_steps": 100, | |
| "global_step": 2500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.3030611276626587, | |
| "learning_rate": 3.6e-06, | |
| "loss": 1.1145790100097657, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.540786623954773, | |
| "learning_rate": 7.6e-06, | |
| "loss": 1.2167404174804688, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.0591915845870972, | |
| "learning_rate": 1.16e-05, | |
| "loss": 1.0437713623046876, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.6695119142532349, | |
| "learning_rate": 1.56e-05, | |
| "loss": 0.9282869338989258, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.7912387847900391, | |
| "learning_rate": 1.9600000000000002e-05, | |
| "loss": 0.8799624443054199, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.7810359001159668, | |
| "learning_rate": 2.36e-05, | |
| "loss": 0.7062759399414062, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 0.7185921669006348, | |
| "learning_rate": 2.7600000000000003e-05, | |
| "loss": 0.7228042602539062, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.7974339723587036, | |
| "learning_rate": 3.16e-05, | |
| "loss": 0.6257906913757324, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.7850703597068787, | |
| "learning_rate": 3.56e-05, | |
| "loss": 0.5399329185485839, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.7295215129852295, | |
| "learning_rate": 3.960000000000001e-05, | |
| "loss": 0.5184461116790772, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 0.5476460456848145, | |
| "eval_runtime": 21.5181, | |
| "eval_samples_per_second": 18.589, | |
| "eval_steps_per_second": 3.114, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.0682953596115112, | |
| "learning_rate": 4.36e-05, | |
| "loss": 0.5210700988769531, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.9108087420463562, | |
| "learning_rate": 4.76e-05, | |
| "loss": 0.5155693531036377, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.0037930011749268, | |
| "learning_rate": 5.16e-05, | |
| "loss": 0.45534143447875974, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.9430785179138184, | |
| "learning_rate": 5.560000000000001e-05, | |
| "loss": 0.45524797439575193, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.9689427614212036, | |
| "learning_rate": 5.96e-05, | |
| "loss": 0.47152209281921387, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.7584393620491028, | |
| "learning_rate": 6.36e-05, | |
| "loss": 0.4532940864562988, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.7581620216369629, | |
| "learning_rate": 6.76e-05, | |
| "loss": 0.48988704681396483, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.9882776141166687, | |
| "learning_rate": 7.16e-05, | |
| "loss": 0.46865572929382326, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.743236780166626, | |
| "learning_rate": 7.560000000000001e-05, | |
| "loss": 0.45577139854431153, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.6103836894035339, | |
| "learning_rate": 7.960000000000001e-05, | |
| "loss": 0.4559042453765869, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 0.485470175743103, | |
| "eval_runtime": 17.4199, | |
| "eval_samples_per_second": 22.962, | |
| "eval_steps_per_second": 3.846, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.8245580792427063, | |
| "learning_rate": 8.36e-05, | |
| "loss": 0.45926451683044434, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.6920369267463684, | |
| "learning_rate": 8.76e-05, | |
| "loss": 0.4545453548431396, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.6936920881271362, | |
| "learning_rate": 9.16e-05, | |
| "loss": 0.47637343406677246, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.6694210767745972, | |
| "learning_rate": 9.56e-05, | |
| "loss": 0.43120541572570803, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.583095133304596, | |
| "learning_rate": 9.960000000000001e-05, | |
| "loss": 0.4153712272644043, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.6926116943359375, | |
| "learning_rate": 9.999605221019081e-05, | |
| "loss": 0.44300012588500975, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.761324405670166, | |
| "learning_rate": 9.998240632972073e-05, | |
| "loss": 0.462084436416626, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.5191273093223572, | |
| "learning_rate": 9.995901628010196e-05, | |
| "loss": 0.39808471202850343, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.8463711738586426, | |
| "learning_rate": 9.9925886621271e-05, | |
| "loss": 0.423044490814209, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.8373249769210815, | |
| "learning_rate": 9.98830238119205e-05, | |
| "loss": 0.41622562408447267, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 0.4695434272289276, | |
| "eval_runtime": 19.2419, | |
| "eval_samples_per_second": 20.788, | |
| "eval_steps_per_second": 3.482, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.6290304064750671, | |
| "learning_rate": 9.983043620824005e-05, | |
| "loss": 0.4166346549987793, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.6189863681793213, | |
| "learning_rate": 9.97681340622872e-05, | |
| "loss": 0.43734130859375, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.5579029321670532, | |
| "learning_rate": 9.969612951998874e-05, | |
| "loss": 0.3747305631637573, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.3599999999999999, | |
| "grad_norm": 1.1675549745559692, | |
| "learning_rate": 9.961443661877289e-05, | |
| "loss": 0.42578792572021484, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.6578675508499146, | |
| "learning_rate": 9.952307128483256e-05, | |
| "loss": 0.39537777900695803, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.8092941045761108, | |
| "learning_rate": 9.942205133002068e-05, | |
| "loss": 0.4084367275238037, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.6226063370704651, | |
| "learning_rate": 9.931139644837754e-05, | |
| "loss": 0.3781426906585693, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.7148721218109131, | |
| "learning_rate": 9.919112821229163e-05, | |
| "loss": 0.3952002048492432, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.5743547081947327, | |
| "learning_rate": 9.906127006829384e-05, | |
| "loss": 0.4087832927703857, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.6315461993217468, | |
| "learning_rate": 9.892184733248666e-05, | |
| "loss": 0.3861570119857788, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 0.45406103134155273, | |
| "eval_runtime": 19.7154, | |
| "eval_samples_per_second": 20.289, | |
| "eval_steps_per_second": 3.398, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.6243694424629211, | |
| "learning_rate": 9.877288718560866e-05, | |
| "loss": 0.39033331871032717, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.6800000000000002, | |
| "grad_norm": 0.6677294969558716, | |
| "learning_rate": 9.861441866773564e-05, | |
| "loss": 0.43663845062255857, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.6460554599761963, | |
| "learning_rate": 9.844647267261916e-05, | |
| "loss": 0.43364706039428713, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.570160984992981, | |
| "learning_rate": 9.82690819416637e-05, | |
| "loss": 0.409498929977417, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.5696760416030884, | |
| "learning_rate": 9.808228105754376e-05, | |
| "loss": 0.4264820098876953, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.8399999999999999, | |
| "grad_norm": 0.583260715007782, | |
| "learning_rate": 9.788610643746184e-05, | |
| "loss": 0.417040491104126, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.6025984287261963, | |
| "learning_rate": 9.76805963260488e-05, | |
| "loss": 0.3749807357788086, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.5953373312950134, | |
| "learning_rate": 9.746579078790807e-05, | |
| "loss": 0.4022481918334961, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.4357820153236389, | |
| "learning_rate": 9.724173169980491e-05, | |
| "loss": 0.38319835662841795, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.5152677297592163, | |
| "learning_rate": 9.700846274250251e-05, | |
| "loss": 0.4122174263000488, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.44415727257728577, | |
| "eval_runtime": 18.9015, | |
| "eval_samples_per_second": 21.162, | |
| "eval_steps_per_second": 3.545, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.38848409056663513, | |
| "learning_rate": 9.676602939224629e-05, | |
| "loss": 0.3524669408798218, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.5285012125968933, | |
| "learning_rate": 9.651447891189825e-05, | |
| "loss": 0.3717231273651123, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 0.6452465653419495, | |
| "learning_rate": 9.62538603417229e-05, | |
| "loss": 0.40065832138061525, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.48196467757225037, | |
| "learning_rate": 9.598422448982696e-05, | |
| "loss": 0.33635973930358887, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.563376247882843, | |
| "learning_rate": 9.570562392225396e-05, | |
| "loss": 0.3708656787872314, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.6459429860115051, | |
| "learning_rate": 9.541811295273656e-05, | |
| "loss": 0.35284056663513186, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.2800000000000002, | |
| "grad_norm": 0.5247339606285095, | |
| "learning_rate": 9.512174763210797e-05, | |
| "loss": 0.3429510831832886, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.5456256866455078, | |
| "learning_rate": 9.481658573737465e-05, | |
| "loss": 0.36770102977752683, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 0.5435087084770203, | |
| "learning_rate": 9.450268676045262e-05, | |
| "loss": 0.3684037208557129, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.5584478974342346, | |
| "learning_rate": 9.418011189656941e-05, | |
| "loss": 0.3221792697906494, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_loss": 0.44748273491859436, | |
| "eval_runtime": 18.8521, | |
| "eval_samples_per_second": 21.218, | |
| "eval_steps_per_second": 3.554, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 0.7217129468917847, | |
| "learning_rate": 9.384892403233384e-05, | |
| "loss": 0.40174164772033694, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.5068971514701843, | |
| "learning_rate": 9.35091877334763e-05, | |
| "loss": 0.3701002836227417, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.4331487715244293, | |
| "learning_rate": 9.316096923226135e-05, | |
| "loss": 0.3759175777435303, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.5161293148994446, | |
| "learning_rate": 9.28043364145758e-05, | |
| "loss": 0.3581662178039551, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 0.709299623966217, | |
| "learning_rate": 9.24393588066941e-05, | |
| "loss": 0.35065665245056155, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.6004891991615295, | |
| "learning_rate": 9.206610756172402e-05, | |
| "loss": 0.36879355907440187, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.4662474989891052, | |
| "learning_rate": 9.168465544573536e-05, | |
| "loss": 0.3592060565948486, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.5826489329338074, | |
| "learning_rate": 9.129507682357394e-05, | |
| "loss": 0.36156315803527833, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 0.48988744616508484, | |
| "learning_rate": 9.089744764436403e-05, | |
| "loss": 0.34445748329162595, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.4443361163139343, | |
| "learning_rate": 9.049184542670199e-05, | |
| "loss": 0.3526463985443115, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_loss": 0.44259119033813477, | |
| "eval_runtime": 16.8228, | |
| "eval_samples_per_second": 23.777, | |
| "eval_steps_per_second": 3.983, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 0.5471161007881165, | |
| "learning_rate": 9.007834924354383e-05, | |
| "loss": 0.3458081245422363, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.5264748930931091, | |
| "learning_rate": 8.965703970678974e-05, | |
| "loss": 0.3651163101196289, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.48987507820129395, | |
| "learning_rate": 8.922799895156867e-05, | |
| "loss": 0.3218229293823242, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.5640589594841003, | |
| "learning_rate": 8.879131062022598e-05, | |
| "loss": 0.3561582088470459, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.7934619784355164, | |
| "learning_rate": 8.834705984601708e-05, | |
| "loss": 0.36128854751586914, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "grad_norm": 1.0869489908218384, | |
| "learning_rate": 8.789533323651066e-05, | |
| "loss": 0.31422438621521, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "grad_norm": 0.4695897102355957, | |
| "learning_rate": 8.74362188567043e-05, | |
| "loss": 0.29355826377868655, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 0.5532680153846741, | |
| "learning_rate": 8.696980621185602e-05, | |
| "loss": 0.3185117721557617, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 0.5760806202888489, | |
| "learning_rate": 8.649618623003508e-05, | |
| "loss": 0.28971233367919924, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.5517900586128235, | |
| "learning_rate": 8.601545124439535e-05, | |
| "loss": 0.3055370092391968, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "eval_loss": 0.4529191255569458, | |
| "eval_runtime": 18.5382, | |
| "eval_samples_per_second": 21.577, | |
| "eval_steps_per_second": 3.614, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "grad_norm": 0.5356678366661072, | |
| "learning_rate": 8.552769497517482e-05, | |
| "loss": 0.28035550117492675, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.2800000000000002, | |
| "grad_norm": 0.5985352993011475, | |
| "learning_rate": 8.503301251142459e-05, | |
| "loss": 0.3199602603912354, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "grad_norm": 0.5187913179397583, | |
| "learning_rate": 8.453150029247114e-05, | |
| "loss": 0.29444499015808107, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "grad_norm": 0.5703292489051819, | |
| "learning_rate": 8.402325608911526e-05, | |
| "loss": 0.30467259883880615, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.9323157072067261, | |
| "learning_rate": 8.350837898457143e-05, | |
| "loss": 0.3117033004760742, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 0.628546953201294, | |
| "learning_rate": 8.298696935515132e-05, | |
| "loss": 0.34261503219604494, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 0.5379561185836792, | |
| "learning_rate": 8.245912885069531e-05, | |
| "loss": 0.3159458637237549, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.6575730443000793, | |
| "learning_rate": 8.192496037475562e-05, | |
| "loss": 0.2982481002807617, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "grad_norm": 0.5830497145652771, | |
| "learning_rate": 8.138456806453503e-05, | |
| "loss": 0.3232215404510498, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.5474710464477539, | |
| "learning_rate": 8.083805727058513e-05, | |
| "loss": 0.3305091381072998, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_loss": 0.44760578870773315, | |
| "eval_runtime": 19.5159, | |
| "eval_samples_per_second": 20.496, | |
| "eval_steps_per_second": 3.433, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "grad_norm": 0.5096336007118225, | |
| "learning_rate": 8.028553453626808e-05, | |
| "loss": 0.35752732753753663, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "grad_norm": 0.5023341774940491, | |
| "learning_rate": 7.972710757698567e-05, | |
| "loss": 0.3292932271957397, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.7199999999999998, | |
| "grad_norm": 0.5277951955795288, | |
| "learning_rate": 7.916288525918007e-05, | |
| "loss": 0.28986682891845705, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "grad_norm": 0.600412905216217, | |
| "learning_rate": 7.859297757911013e-05, | |
| "loss": 0.3027395725250244, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.6396210193634033, | |
| "learning_rate": 7.801749564140724e-05, | |
| "loss": 0.3238774061203003, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.628635585308075, | |
| "learning_rate": 7.743655163741543e-05, | |
| "loss": 0.34537086486816404, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "grad_norm": 0.49822649359703064, | |
| "learning_rate": 7.685025882331936e-05, | |
| "loss": 0.3292637825012207, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "grad_norm": 0.5356727242469788, | |
| "learning_rate": 7.62587314980648e-05, | |
| "loss": 0.32722015380859376, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "grad_norm": 0.6211317777633667, | |
| "learning_rate": 7.566208498107585e-05, | |
| "loss": 0.29880056381225584, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.5336779356002808, | |
| "learning_rate": 7.506043558977321e-05, | |
| "loss": 0.2978524684906006, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.44613513350486755, | |
| "eval_runtime": 19.2382, | |
| "eval_samples_per_second": 20.792, | |
| "eval_steps_per_second": 3.483, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "grad_norm": 0.6681120991706848, | |
| "learning_rate": 7.445390061689782e-05, | |
| "loss": 0.27530927658081056, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "grad_norm": 0.6299528479576111, | |
| "learning_rate": 7.38425983076444e-05, | |
| "loss": 0.2517704486846924, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "grad_norm": 0.5211061239242554, | |
| "learning_rate": 7.32266478366094e-05, | |
| "loss": 0.28200175762176516, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 0.5778363347053528, | |
| "learning_rate": 7.260616928455754e-05, | |
| "loss": 0.2569046258926392, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.6715266108512878, | |
| "learning_rate": 7.1981283615012e-05, | |
| "loss": 0.2665576696395874, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "grad_norm": 0.6580007672309875, | |
| "learning_rate": 7.135211265067216e-05, | |
| "loss": 0.2635650634765625, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "grad_norm": 0.6889304518699646, | |
| "learning_rate": 7.071877904966423e-05, | |
| "loss": 0.26842334270477297, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "grad_norm": 0.5896309018135071, | |
| "learning_rate": 7.00814062816285e-05, | |
| "loss": 0.2633937358856201, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "grad_norm": 0.6062363386154175, | |
| "learning_rate": 6.944011860364905e-05, | |
| "loss": 0.2895397186279297, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.6124110817909241, | |
| "learning_rate": 6.879504103602935e-05, | |
| "loss": 0.27405414581298826, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "eval_loss": 0.46795058250427246, | |
| "eval_runtime": 17.2143, | |
| "eval_samples_per_second": 23.237, | |
| "eval_steps_per_second": 3.892, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.8100364208221436, | |
| "learning_rate": 6.814629933791931e-05, | |
| "loss": 0.2581511974334717, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 0.6187950372695923, | |
| "learning_rate": 6.749401998279846e-05, | |
| "loss": 0.2689012050628662, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 0.6595885157585144, | |
| "learning_rate": 6.683833013381941e-05, | |
| "loss": 0.27230424880981446, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 4.5600000000000005, | |
| "grad_norm": 0.6320788860321045, | |
| "learning_rate": 6.617935761901748e-05, | |
| "loss": 0.2903036594390869, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 0.6367589831352234, | |
| "learning_rate": 6.551723090639007e-05, | |
| "loss": 0.2551115989685059, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "grad_norm": 0.5754795670509338, | |
| "learning_rate": 6.485207907885175e-05, | |
| "loss": 0.2783109188079834, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "grad_norm": 0.6343188881874084, | |
| "learning_rate": 6.418403180906922e-05, | |
| "loss": 0.29131503105163575, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "grad_norm": 0.6726956963539124, | |
| "learning_rate": 6.351321933418139e-05, | |
| "loss": 0.2730400085449219, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "grad_norm": 0.5498913526535034, | |
| "learning_rate": 6.283977243040939e-05, | |
| "loss": 0.2572148323059082, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.6083167195320129, | |
| "learning_rate": 6.216382238756146e-05, | |
| "loss": 0.27444655895233155, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "eval_loss": 0.466619610786438, | |
| "eval_runtime": 19.9505, | |
| "eval_samples_per_second": 20.05, | |
| "eval_steps_per_second": 3.358, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 0.5861450433731079, | |
| "learning_rate": 6.148550098343778e-05, | |
| "loss": 0.27054529190063475, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 0.7090939879417419, | |
| "learning_rate": 6.080494045814011e-05, | |
| "loss": 0.26785056591033934, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "grad_norm": 0.5825073719024658, | |
| "learning_rate": 6.0122273488291304e-05, | |
| "loss": 0.26335647106170657, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "grad_norm": 0.5506169199943542, | |
| "learning_rate": 5.943763316116977e-05, | |
| "loss": 0.2614041090011597, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.6169804930686951, | |
| "learning_rate": 5.875115294876381e-05, | |
| "loss": 0.24768717288970948, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "grad_norm": 0.8200834393501282, | |
| "learning_rate": 5.806296668175104e-05, | |
| "loss": 0.21707432270050048, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 5.08, | |
| "grad_norm": 1.5680038928985596, | |
| "learning_rate": 5.737320852340775e-05, | |
| "loss": 0.2139519214630127, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "grad_norm": 0.6845637559890747, | |
| "learning_rate": 5.668201294345363e-05, | |
| "loss": 0.20998594760894776, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "grad_norm": 0.8293268084526062, | |
| "learning_rate": 5.598951469183649e-05, | |
| "loss": 0.23306002616882324, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 0.7228839993476868, | |
| "learning_rate": 5.52958487724626e-05, | |
| "loss": 0.2262401580810547, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "eval_loss": 0.49972543120384216, | |
| "eval_runtime": 18.926, | |
| "eval_samples_per_second": 21.135, | |
| "eval_steps_per_second": 3.54, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "grad_norm": 0.6243706345558167, | |
| "learning_rate": 5.4601150416877367e-05, | |
| "loss": 0.21100988388061523, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "grad_norm": 1.0553343296051025, | |
| "learning_rate": 5.390555505790168e-05, | |
| "loss": 0.23542592525482178, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 5.32, | |
| "grad_norm": 0.6127402186393738, | |
| "learning_rate": 5.3209198303229027e-05, | |
| "loss": 0.2095633029937744, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "grad_norm": 0.7463288903236389, | |
| "learning_rate": 5.2512215908988484e-05, | |
| "loss": 0.21693904399871827, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 5.4, | |
| "grad_norm": 0.8020226955413818, | |
| "learning_rate": 5.1814743753278795e-05, | |
| "loss": 0.2076347827911377, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "grad_norm": 0.6652446389198303, | |
| "learning_rate": 5.111691780967869e-05, | |
| "loss": 0.22539749145507812, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "grad_norm": 0.6378898620605469, | |
| "learning_rate": 5.041887412073854e-05, | |
| "loss": 0.2077547550201416, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 0.7381134033203125, | |
| "learning_rate": 4.97207487714586e-05, | |
| "loss": 0.21558783054351807, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 5.5600000000000005, | |
| "grad_norm": 0.6613102555274963, | |
| "learning_rate": 4.9022677862758945e-05, | |
| "loss": 0.21069679260253907, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "grad_norm": 0.7527480721473694, | |
| "learning_rate": 4.832479748494643e-05, | |
| "loss": 0.21843309402465821, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "eval_loss": 0.49576279520988464, | |
| "eval_runtime": 18.3368, | |
| "eval_samples_per_second": 21.814, | |
| "eval_steps_per_second": 3.654, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "grad_norm": 0.5983570218086243, | |
| "learning_rate": 4.7627243691183453e-05, | |
| "loss": 0.22310276031494142, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "grad_norm": 0.6202098727226257, | |
| "learning_rate": 4.693015247096423e-05, | |
| "loss": 0.22056117057800292, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "grad_norm": 0.7730934023857117, | |
| "learning_rate": 4.623365972360337e-05, | |
| "loss": 0.2241537094116211, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "grad_norm": 0.6262892484664917, | |
| "learning_rate": 4.553790123174197e-05, | |
| "loss": 0.21514451503753662, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "grad_norm": 0.646507203578949, | |
| "learning_rate": 4.484301263487665e-05, | |
| "loss": 0.21031346321105956, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "grad_norm": 0.8227706551551819, | |
| "learning_rate": 4.414912940291613e-05, | |
| "loss": 0.2312474489212036, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 0.6932390332221985, | |
| "learning_rate": 4.345638680977139e-05, | |
| "loss": 0.22380952835083007, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 0.7352316379547119, | |
| "learning_rate": 4.276491990698355e-05, | |
| "loss": 0.22706894874572753, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "grad_norm": 0.6953718066215515, | |
| "learning_rate": 4.2074863497395377e-05, | |
| "loss": 0.2103546142578125, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.661618709564209, | |
| "learning_rate": 4.1386352108871174e-05, | |
| "loss": 0.2276217222213745, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.4966464042663574, | |
| "eval_runtime": 17.2948, | |
| "eval_samples_per_second": 23.128, | |
| "eval_steps_per_second": 3.874, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "grad_norm": 0.8837434649467468, | |
| "learning_rate": 4.069951996807034e-05, | |
| "loss": 0.16540236473083497, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "grad_norm": 1.3857215642929077, | |
| "learning_rate": 4.001450097427966e-05, | |
| "loss": 0.1638352394104004, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "grad_norm": 0.8306711912155151, | |
| "learning_rate": 3.9331428673309204e-05, | |
| "loss": 0.1719011664390564, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "grad_norm": 0.8509021997451782, | |
| "learning_rate": 3.865043623145751e-05, | |
| "loss": 0.1651092290878296, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "grad_norm": 0.7507994174957275, | |
| "learning_rate": 3.797165640955041e-05, | |
| "loss": 0.1746900796890259, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 0.740626335144043, | |
| "learning_rate": 3.729522153705916e-05, | |
| "loss": 0.16637682914733887, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "grad_norm": 0.6479809880256653, | |
| "learning_rate": 3.662126348630237e-05, | |
| "loss": 0.1709848165512085, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "grad_norm": 0.6932395100593567, | |
| "learning_rate": 3.594991364673745e-05, | |
| "loss": 0.18107957839965821, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "grad_norm": 0.8027141690254211, | |
| "learning_rate": 3.528130289934583e-05, | |
| "loss": 0.16225044727325438, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "grad_norm": 0.5781376957893372, | |
| "learning_rate": 3.461556159111748e-05, | |
| "loss": 0.17544152736663818, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "eval_loss": 0.5342507362365723, | |
| "eval_runtime": 19.471, | |
| "eval_samples_per_second": 20.543, | |
| "eval_steps_per_second": 3.441, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "grad_norm": 0.7642867565155029, | |
| "learning_rate": 3.3952819509639534e-05, | |
| "loss": 0.17091144323349, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "grad_norm": 0.7651257514953613, | |
| "learning_rate": 3.329320585779393e-05, | |
| "loss": 0.17765278816223146, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "grad_norm": 0.6956056356430054, | |
| "learning_rate": 3.263684922856905e-05, | |
| "loss": 0.16475566625595092, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 6.5600000000000005, | |
| "grad_norm": 0.7344402074813843, | |
| "learning_rate": 3.1983877579990274e-05, | |
| "loss": 0.172060227394104, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "grad_norm": 0.7196578979492188, | |
| "learning_rate": 3.1334418210174263e-05, | |
| "loss": 0.16673840284347535, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "grad_norm": 0.7540257573127747, | |
| "learning_rate": 3.0688597732512e-05, | |
| "loss": 0.17414634227752684, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "grad_norm": 0.5103999972343445, | |
| "learning_rate": 3.0046542050985237e-05, | |
| "loss": 0.1620783567428589, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "grad_norm": 0.8846920132637024, | |
| "learning_rate": 2.940837633562127e-05, | |
| "loss": 0.17428462505340575, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "grad_norm": 0.8017328381538391, | |
| "learning_rate": 2.877422499809072e-05, | |
| "loss": 0.19050977230072022, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "grad_norm": 0.8515416383743286, | |
| "learning_rate": 2.8144211667453368e-05, | |
| "loss": 0.16926174163818358, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "eval_loss": 0.5441356301307678, | |
| "eval_runtime": 17.5836, | |
| "eval_samples_per_second": 22.749, | |
| "eval_steps_per_second": 3.81, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "grad_norm": 0.7547643184661865, | |
| "learning_rate": 2.75184591660563e-05, | |
| "loss": 0.1793771743774414, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "grad_norm": 0.7164461016654968, | |
| "learning_rate": 2.6897089485589583e-05, | |
| "loss": 0.1647491931915283, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "grad_norm": 1.1592035293579102, | |
| "learning_rate": 2.6280223763303546e-05, | |
| "loss": 0.17397019863128663, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 0.9889470934867859, | |
| "learning_rate": 2.5667982258393014e-05, | |
| "loss": 0.17107686996459961, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 0.7448652982711792, | |
| "learning_rate": 2.506048432855247e-05, | |
| "loss": 0.1730511426925659, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "grad_norm": 0.6695497632026672, | |
| "learning_rate": 2.4457848406707013e-05, | |
| "loss": 0.13950222730636597, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "grad_norm": 0.7200675010681152, | |
| "learning_rate": 2.3860191977923672e-05, | |
| "loss": 0.1326605796813965, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "grad_norm": 0.6615055799484253, | |
| "learning_rate": 2.326763155650744e-05, | |
| "loss": 0.1265331983566284, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "grad_norm": 0.8998573422431946, | |
| "learning_rate": 2.2680282663286552e-05, | |
| "loss": 0.12731509208679198, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "grad_norm": 0.808588981628418, | |
| "learning_rate": 2.209825980309151e-05, | |
| "loss": 0.13114826679229735, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_loss": 0.5847110748291016, | |
| "eval_runtime": 18.9921, | |
| "eval_samples_per_second": 21.061, | |
| "eval_steps_per_second": 3.528, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "grad_norm": 0.951817512512207, | |
| "learning_rate": 2.152167644243213e-05, | |
| "loss": 0.12906957864761354, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "grad_norm": 0.8695458173751831, | |
| "learning_rate": 2.095064498737701e-05, | |
| "loss": 0.133590030670166, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "grad_norm": 0.7357354760169983, | |
| "learning_rate": 2.0385276761639765e-05, | |
| "loss": 0.13653848171234131, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "grad_norm": 0.7873698472976685, | |
| "learning_rate": 1.9825681984876172e-05, | |
| "loss": 0.12472724914550781, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 7.4, | |
| "grad_norm": 0.873921811580658, | |
| "learning_rate": 1.9271969751196776e-05, | |
| "loss": 0.13255125284194946, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "grad_norm": 0.7591536045074463, | |
| "learning_rate": 1.8724248007898647e-05, | |
| "loss": 0.13693161010742189, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "grad_norm": 1.0509488582611084, | |
| "learning_rate": 1.8182623534420907e-05, | |
| "loss": 0.13425672054290771, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 7.52, | |
| "grad_norm": 0.8472399711608887, | |
| "learning_rate": 1.76472019215278e-05, | |
| "loss": 0.13668575286865234, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 7.5600000000000005, | |
| "grad_norm": 0.911901593208313, | |
| "learning_rate": 1.7118087550723633e-05, | |
| "loss": 0.1317702889442444, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "grad_norm": 0.9731144309043884, | |
| "learning_rate": 1.659538357390341e-05, | |
| "loss": 0.14458621740341188, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "eval_loss": 0.5830516219139099, | |
| "eval_runtime": 18.7747, | |
| "eval_samples_per_second": 21.305, | |
| "eval_steps_per_second": 3.569, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "grad_norm": 0.5515460968017578, | |
| "learning_rate": 1.60791918932431e-05, | |
| "loss": 0.13126691579818725, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "grad_norm": 0.7286776304244995, | |
| "learning_rate": 1.556961314133359e-05, | |
| "loss": 0.12600460052490234, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "grad_norm": 0.95229572057724, | |
| "learning_rate": 1.5066746661562253e-05, | |
| "loss": 0.12453792095184327, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "grad_norm": 0.7712796330451965, | |
| "learning_rate": 1.4570690488745687e-05, | |
| "loss": 0.14839541912078857, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "grad_norm": 0.8011840581893921, | |
| "learning_rate": 1.4081541330017705e-05, | |
| "loss": 0.1321096420288086, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "grad_norm": 0.936607301235199, | |
| "learning_rate": 1.3599394545975951e-05, | |
| "loss": 0.1317069411277771, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "grad_norm": 0.9034994840621948, | |
| "learning_rate": 1.312434413209131e-05, | |
| "loss": 0.13362932205200195, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "grad_norm": 0.9586318731307983, | |
| "learning_rate": 1.2656482700383237e-05, | |
| "loss": 0.12677763700485228, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "grad_norm": 0.9358674883842468, | |
| "learning_rate": 1.219590146136485e-05, | |
| "loss": 0.1382434129714966, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.8410677313804626, | |
| "learning_rate": 1.1742690206261292e-05, | |
| "loss": 0.12519369125366211, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.5840195417404175, | |
| "eval_runtime": 18.625, | |
| "eval_samples_per_second": 21.477, | |
| "eval_steps_per_second": 3.597, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "grad_norm": 0.6319883465766907, | |
| "learning_rate": 1.129693728950474e-05, | |
| "loss": 0.10409053564071655, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "grad_norm": 0.7751646041870117, | |
| "learning_rate": 1.0858729611509516e-05, | |
| "loss": 0.10310100317001343, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "grad_norm": 0.9277542233467102, | |
| "learning_rate": 1.0428152601730718e-05, | |
| "loss": 0.09960774183273316, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "grad_norm": 0.8381429314613342, | |
| "learning_rate": 1.0005290202009531e-05, | |
| "loss": 0.09982571601867676, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 8.2, | |
| "grad_norm": 0.7726228833198547, | |
| "learning_rate": 9.590224850208646e-06, | |
| "loss": 0.11322143077850341, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 8.24, | |
| "grad_norm": 0.7724836468696594, | |
| "learning_rate": 9.183037464140804e-06, | |
| "loss": 0.10006082057952881, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "grad_norm": 1.0587371587753296, | |
| "learning_rate": 8.783807425793721e-06, | |
| "loss": 0.11560235023498536, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "grad_norm": 0.8337858319282532, | |
| "learning_rate": 8.392612565854375e-06, | |
| "loss": 0.10931503772735596, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "grad_norm": 0.805338978767395, | |
| "learning_rate": 8.009529148535855e-06, | |
| "loss": 0.10900030136108399, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "grad_norm": 0.7612441182136536, | |
| "learning_rate": 7.63463185670939e-06, | |
| "loss": 0.1069128155708313, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "eval_loss": 0.6247864961624146, | |
| "eval_runtime": 18.281, | |
| "eval_samples_per_second": 21.881, | |
| "eval_steps_per_second": 3.665, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "grad_norm": 0.8081948757171631, | |
| "learning_rate": 7.267993777344856e-06, | |
| "loss": 0.09856721758842468, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "grad_norm": 0.7861329913139343, | |
| "learning_rate": 6.909686387262254e-06, | |
| "loss": 0.10609345436096192, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "grad_norm": 0.7145861387252808, | |
| "learning_rate": 6.559779539197231e-06, | |
| "loss": 0.105103600025177, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "grad_norm": 0.7359808683395386, | |
| "learning_rate": 6.21834144818314e-06, | |
| "loss": 0.10853493213653564, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "grad_norm": 0.8519245982170105, | |
| "learning_rate": 5.885438678252342e-06, | |
| "loss": 0.11464111804962158, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "grad_norm": 0.8307661414146423, | |
| "learning_rate": 5.5611361294594325e-06, | |
| "loss": 0.10765299797058106, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "grad_norm": 0.8340169787406921, | |
| "learning_rate": 5.245497025228874e-06, | |
| "loss": 0.10699164867401123, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "grad_norm": 0.7895165085792542, | |
| "learning_rate": 4.938582900029437e-06, | |
| "loss": 0.10728691816329956, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 8.76, | |
| "grad_norm": 0.7967789769172668, | |
| "learning_rate": 4.640453587377957e-06, | |
| "loss": 0.11177785396575927, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "grad_norm": 0.8613453507423401, | |
| "learning_rate": 4.351167208174639e-06, | |
| "loss": 0.11041848659515381, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "eval_loss": 0.6235533356666565, | |
| "eval_runtime": 19.0901, | |
| "eval_samples_per_second": 20.953, | |
| "eval_steps_per_second": 3.51, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "grad_norm": 0.6587359309196472, | |
| "learning_rate": 4.0707801593723e-06, | |
| "loss": 0.1085782766342163, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "grad_norm": 0.7126621603965759, | |
| "learning_rate": 3.799347102981665e-06, | |
| "loss": 0.11138873100280762, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "grad_norm": 0.7560760974884033, | |
| "learning_rate": 3.536920955414885e-06, | |
| "loss": 0.10770895481109619, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "grad_norm": 0.95421302318573, | |
| "learning_rate": 3.2835528771693992e-06, | |
| "loss": 0.11167995929718018, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 0.9774760007858276, | |
| "learning_rate": 3.039292262854088e-06, | |
| "loss": 0.11738998889923095, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 0.7680178880691528, | |
| "learning_rate": 2.804186731559677e-06, | |
| "loss": 0.10072145462036133, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "grad_norm": 0.8222008943557739, | |
| "learning_rate": 2.5782821175753422e-06, | |
| "loss": 0.09228388667106628, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "grad_norm": 0.8610215783119202, | |
| "learning_rate": 2.361622461453178e-06, | |
| "loss": 0.09626876711845397, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "grad_norm": 0.7807718515396118, | |
| "learning_rate": 2.154250001422431e-06, | |
| "loss": 0.0960278868675232, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "grad_norm": 0.8036084175109863, | |
| "learning_rate": 1.956205165155078e-06, | |
| "loss": 0.0941778838634491, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "eval_loss": 0.6419874429702759, | |
| "eval_runtime": 19.9334, | |
| "eval_samples_per_second": 20.067, | |
| "eval_steps_per_second": 3.361, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "grad_norm": 0.7480472326278687, | |
| "learning_rate": 1.7675265618843362e-06, | |
| "loss": 0.09725146293640137, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "grad_norm": 0.8559448719024658, | |
| "learning_rate": 1.5882509748777808e-06, | |
| "loss": 0.09353782534599304, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "grad_norm": 0.6416171193122864, | |
| "learning_rate": 1.4184133542663014e-06, | |
| "loss": 0.09848537445068359, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "grad_norm": 0.7388947606086731, | |
| "learning_rate": 1.258046810230562e-06, | |
| "loss": 0.10164464712142944, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "grad_norm": 0.8187626600265503, | |
| "learning_rate": 1.1071826065460588e-06, | |
| "loss": 0.0934177041053772, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "grad_norm": 0.865635871887207, | |
| "learning_rate": 9.65850154488218e-07, | |
| "loss": 0.1012031078338623, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "grad_norm": 0.8829763531684875, | |
| "learning_rate": 8.340770070986214e-07, | |
| "loss": 0.09371918439865112, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "grad_norm": 0.7734853625297546, | |
| "learning_rate": 7.11888853813436e-07, | |
| "loss": 0.09450345039367676, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "grad_norm": 0.7692961096763611, | |
| "learning_rate": 5.993095154552431e-07, | |
| "loss": 0.09499152898788452, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "grad_norm": 1.1678398847579956, | |
| "learning_rate": 4.963609395891299e-07, | |
| "loss": 0.10716021060943604, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "eval_loss": 0.6402375102043152, | |
| "eval_runtime": 18.9858, | |
| "eval_samples_per_second": 21.068, | |
| "eval_steps_per_second": 3.529, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 9.64, | |
| "grad_norm": 0.7258604764938354, | |
| "learning_rate": 4.030631962439302e-07, | |
| "loss": 0.09596163630485535, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "grad_norm": 0.8662357330322266, | |
| "learning_rate": 3.1943447399958027e-07, | |
| "loss": 0.09645589590072631, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "grad_norm": 0.8258174061775208, | |
| "learning_rate": 2.4549107644117885e-07, | |
| "loss": 0.09415926933288574, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "grad_norm": 0.911540150642395, | |
| "learning_rate": 1.8124741898058462e-07, | |
| "loss": 0.10026730298995971, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "grad_norm": 0.8336577415466309, | |
| "learning_rate": 1.267160260461253e-07, | |
| "loss": 0.09711679220199584, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 9.84, | |
| "grad_norm": 0.7324675917625427, | |
| "learning_rate": 8.190752864088436e-08, | |
| "loss": 0.09345818758010864, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 9.88, | |
| "grad_norm": 0.9261553287506104, | |
| "learning_rate": 4.683066227023081e-08, | |
| "loss": 0.102751624584198, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "grad_norm": 0.9403973817825317, | |
| "learning_rate": 2.1492265238748366e-08, | |
| "loss": 0.0988599717617035, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "grad_norm": 0.7062044739723206, | |
| "learning_rate": 5.897277317157279e-09, | |
| "loss": 0.09828301668167114, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.7819132804870605, | |
| "learning_rate": 4.873877924582715e-11, | |
| "loss": 0.0937616467475891, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.6409608721733093, | |
| "eval_runtime": 17.8761, | |
| "eval_samples_per_second": 22.376, | |
| "eval_steps_per_second": 3.748, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 2500, | |
| "total_flos": 3.634151342457697e+19, | |
| "train_loss": 0.2690703985452652, | |
| "train_runtime": 10014.7733, | |
| "train_samples_per_second": 5.991, | |
| "train_steps_per_second": 0.25 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.634151342457697e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |