Instructions to use wonwonn/diversity_base_adapter with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use wonwonn/diversity_base_adapter with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct") model = PeftModel.from_pretrained(base_model, "wonwonn/diversity_base_adapter") - Transformers
How to use wonwonn/diversity_base_adapter with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="wonwonn/diversity_base_adapter") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("wonwonn/diversity_base_adapter", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use wonwonn/diversity_base_adapter with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "wonwonn/diversity_base_adapter" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wonwonn/diversity_base_adapter", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/wonwonn/diversity_base_adapter
- SGLang
How to use wonwonn/diversity_base_adapter with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "wonwonn/diversity_base_adapter" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wonwonn/diversity_base_adapter", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "wonwonn/diversity_base_adapter" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wonwonn/diversity_base_adapter", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use wonwonn/diversity_base_adapter with Docker Model Runner:
docker model run hf.co/wonwonn/diversity_base_adapter
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 696, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0028776978417266188, | |
| "grad_norm": 0.42690583075609034, | |
| "learning_rate": 0.0, | |
| "loss": 1.3486042022705078, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0057553956834532375, | |
| "grad_norm": 0.3882655966887319, | |
| "learning_rate": 5.714285714285714e-08, | |
| "loss": 1.2684719562530518, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008633093525179856, | |
| "grad_norm": 0.4443954365616111, | |
| "learning_rate": 1.1428571428571427e-07, | |
| "loss": 1.177267074584961, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.011510791366906475, | |
| "grad_norm": 0.4253698770879055, | |
| "learning_rate": 1.7142857142857143e-07, | |
| "loss": 1.181878924369812, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.014388489208633094, | |
| "grad_norm": 0.3720421036789158, | |
| "learning_rate": 2.2857142857142855e-07, | |
| "loss": 1.1671853065490723, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.017266187050359712, | |
| "grad_norm": 0.3622614120618611, | |
| "learning_rate": 2.857142857142857e-07, | |
| "loss": 1.1122022867202759, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.02014388489208633, | |
| "grad_norm": 0.38384210342955205, | |
| "learning_rate": 3.4285714285714286e-07, | |
| "loss": 1.1873408555984497, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.02302158273381295, | |
| "grad_norm": 0.4284045629754763, | |
| "learning_rate": 4e-07, | |
| "loss": 1.1845028400421143, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.025899280575539568, | |
| "grad_norm": 0.41551879217805887, | |
| "learning_rate": 4.571428571428571e-07, | |
| "loss": 1.2185403108596802, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.02877697841726619, | |
| "grad_norm": 0.42087035069001, | |
| "learning_rate": 5.142857142857142e-07, | |
| "loss": 1.0747895240783691, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.031654676258992806, | |
| "grad_norm": 0.5150810858798297, | |
| "learning_rate": 5.714285714285714e-07, | |
| "loss": 1.2050367593765259, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.034532374100719423, | |
| "grad_norm": 0.3402347213407099, | |
| "learning_rate": 6.285714285714286e-07, | |
| "loss": 1.1960644721984863, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03741007194244604, | |
| "grad_norm": 0.3833689066105734, | |
| "learning_rate": 6.857142857142857e-07, | |
| "loss": 1.2497148513793945, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.04028776978417266, | |
| "grad_norm": 0.3335104915047139, | |
| "learning_rate": 7.428571428571429e-07, | |
| "loss": 1.1446340084075928, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04316546762589928, | |
| "grad_norm": 0.3492060423539416, | |
| "learning_rate": 8e-07, | |
| "loss": 1.1868774890899658, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0460431654676259, | |
| "grad_norm": 0.36339916703647873, | |
| "learning_rate": 8.57142857142857e-07, | |
| "loss": 1.1652871370315552, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.04892086330935252, | |
| "grad_norm": 0.35128380927769104, | |
| "learning_rate": 9.142857142857142e-07, | |
| "loss": 1.1377315521240234, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.051798561151079135, | |
| "grad_norm": 0.3216270031913542, | |
| "learning_rate": 9.714285714285715e-07, | |
| "loss": 1.179404377937317, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.05467625899280575, | |
| "grad_norm": 0.3626006607419513, | |
| "learning_rate": 1.0285714285714284e-06, | |
| "loss": 1.272096872329712, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05755395683453238, | |
| "grad_norm": 0.37548463438614677, | |
| "learning_rate": 1.0857142857142856e-06, | |
| "loss": 1.1252775192260742, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.060431654676258995, | |
| "grad_norm": 0.39203682362934145, | |
| "learning_rate": 1.1428571428571428e-06, | |
| "loss": 1.2636396884918213, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.06330935251798561, | |
| "grad_norm": 0.3929267980473854, | |
| "learning_rate": 1.2e-06, | |
| "loss": 1.1296113729476929, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06618705035971223, | |
| "grad_norm": 0.3580571203740857, | |
| "learning_rate": 1.2571428571428571e-06, | |
| "loss": 1.2140036821365356, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06906474820143885, | |
| "grad_norm": 0.40128457938538337, | |
| "learning_rate": 1.3142857142857143e-06, | |
| "loss": 1.3345097303390503, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07194244604316546, | |
| "grad_norm": 0.3624963705827193, | |
| "learning_rate": 1.3714285714285715e-06, | |
| "loss": 1.2285950183868408, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07482014388489208, | |
| "grad_norm": 0.3891545493397791, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.1885042190551758, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0776978417266187, | |
| "grad_norm": 0.45890158291879024, | |
| "learning_rate": 1.4857142857142858e-06, | |
| "loss": 1.1534702777862549, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.08057553956834532, | |
| "grad_norm": 0.4134301257295623, | |
| "learning_rate": 1.5428571428571428e-06, | |
| "loss": 1.1666285991668701, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.08345323741007195, | |
| "grad_norm": 0.42695898719570075, | |
| "learning_rate": 1.6e-06, | |
| "loss": 1.0518786907196045, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.08633093525179857, | |
| "grad_norm": 0.40529199232299007, | |
| "learning_rate": 1.657142857142857e-06, | |
| "loss": 1.2913450002670288, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08920863309352518, | |
| "grad_norm": 0.42614405561233504, | |
| "learning_rate": 1.714285714285714e-06, | |
| "loss": 1.1956894397735596, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0920863309352518, | |
| "grad_norm": 0.4536901431733599, | |
| "learning_rate": 1.7714285714285712e-06, | |
| "loss": 1.2299771308898926, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09496402877697842, | |
| "grad_norm": 0.4275911040935052, | |
| "learning_rate": 1.8285714285714284e-06, | |
| "loss": 1.230122685432434, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.09784172661870504, | |
| "grad_norm": 0.40907326990128035, | |
| "learning_rate": 1.8857142857142856e-06, | |
| "loss": 1.2399665117263794, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.10071942446043165, | |
| "grad_norm": 0.42873759553168767, | |
| "learning_rate": 1.942857142857143e-06, | |
| "loss": 1.209876298904419, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.10359712230215827, | |
| "grad_norm": 0.3875078895858393, | |
| "learning_rate": 2e-06, | |
| "loss": 1.3261746168136597, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.10647482014388489, | |
| "grad_norm": 0.3895695917104401, | |
| "learning_rate": 1.999988705525916e-06, | |
| "loss": 1.1430740356445312, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.1093525179856115, | |
| "grad_norm": 0.390575759377815, | |
| "learning_rate": 1.9999548223587944e-06, | |
| "loss": 1.0920931100845337, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.11223021582733812, | |
| "grad_norm": 0.4547783385877052, | |
| "learning_rate": 1.9998983512640208e-06, | |
| "loss": 1.1944105625152588, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.11510791366906475, | |
| "grad_norm": 0.46472429487773786, | |
| "learning_rate": 1.9998192935172177e-06, | |
| "loss": 1.2673561573028564, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11798561151079137, | |
| "grad_norm": 0.41362852460476074, | |
| "learning_rate": 1.9997176509042157e-06, | |
| "loss": 1.2279549837112427, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.12086330935251799, | |
| "grad_norm": 0.4271272595194793, | |
| "learning_rate": 1.9995934257210153e-06, | |
| "loss": 1.1604218482971191, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.1237410071942446, | |
| "grad_norm": 0.3804983247156394, | |
| "learning_rate": 1.9994466207737324e-06, | |
| "loss": 1.1747047901153564, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.12661870503597122, | |
| "grad_norm": 0.3650820732490411, | |
| "learning_rate": 1.9992772393785363e-06, | |
| "loss": 1.0480847358703613, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.12949640287769784, | |
| "grad_norm": 0.41209897515894023, | |
| "learning_rate": 1.9990852853615746e-06, | |
| "loss": 1.2965943813323975, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.13237410071942446, | |
| "grad_norm": 0.4043694740064971, | |
| "learning_rate": 1.9988707630588874e-06, | |
| "loss": 1.1381937265396118, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.13525179856115108, | |
| "grad_norm": 0.4147421237580474, | |
| "learning_rate": 1.9986336773163066e-06, | |
| "loss": 1.1976345777511597, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.1381294964028777, | |
| "grad_norm": 0.46096126219291444, | |
| "learning_rate": 1.99837403348935e-06, | |
| "loss": 1.1909739971160889, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1410071942446043, | |
| "grad_norm": 0.48554729123892804, | |
| "learning_rate": 1.9980918374430994e-06, | |
| "loss": 1.1516118049621582, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.14388489208633093, | |
| "grad_norm": 0.46308911997767715, | |
| "learning_rate": 1.997787095552066e-06, | |
| "loss": 1.2086803913116455, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14676258992805755, | |
| "grad_norm": 0.4490535916599434, | |
| "learning_rate": 1.9974598147000487e-06, | |
| "loss": 1.2800133228302002, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.14964028776978416, | |
| "grad_norm": 0.40996217705477556, | |
| "learning_rate": 1.997110002279978e-06, | |
| "loss": 1.2382150888442993, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.15251798561151078, | |
| "grad_norm": 0.40988669582643505, | |
| "learning_rate": 1.9967376661937477e-06, | |
| "loss": 1.1741186380386353, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.1553956834532374, | |
| "grad_norm": 0.5464526851837473, | |
| "learning_rate": 1.9963428148520393e-06, | |
| "loss": 1.1607799530029297, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.15827338129496402, | |
| "grad_norm": 0.42016622274268145, | |
| "learning_rate": 1.9959254571741285e-06, | |
| "loss": 1.2755463123321533, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.16115107913669063, | |
| "grad_norm": 0.4490415553167208, | |
| "learning_rate": 1.995485602587687e-06, | |
| "loss": 1.261953592300415, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.16402877697841728, | |
| "grad_norm": 0.5169029226242617, | |
| "learning_rate": 1.995023261028567e-06, | |
| "loss": 1.1530394554138184, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.1669064748201439, | |
| "grad_norm": 0.43016608573228415, | |
| "learning_rate": 1.9945384429405776e-06, | |
| "loss": 1.268787145614624, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1697841726618705, | |
| "grad_norm": 0.4793080238946335, | |
| "learning_rate": 1.99403115927525e-06, | |
| "loss": 1.2686214447021484, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.17266187050359713, | |
| "grad_norm": 0.4249978192550724, | |
| "learning_rate": 1.9935014214915883e-06, | |
| "loss": 1.201757550239563, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.17553956834532375, | |
| "grad_norm": 0.4481671623233787, | |
| "learning_rate": 1.992949241555812e-06, | |
| "loss": 1.1886329650878906, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.17841726618705037, | |
| "grad_norm": 0.5402187081810303, | |
| "learning_rate": 1.9923746319410847e-06, | |
| "loss": 1.2228707075119019, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.18129496402877698, | |
| "grad_norm": 0.4000631018631766, | |
| "learning_rate": 1.991777605627234e-06, | |
| "loss": 1.0736989974975586, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.1841726618705036, | |
| "grad_norm": 0.4881119026873745, | |
| "learning_rate": 1.9911581761004556e-06, | |
| "loss": 1.213085651397705, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.18705035971223022, | |
| "grad_norm": 0.5274580867703768, | |
| "learning_rate": 1.990516357353011e-06, | |
| "loss": 1.2776455879211426, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.18992805755395684, | |
| "grad_norm": 0.5316412618386857, | |
| "learning_rate": 1.989852163882911e-06, | |
| "loss": 1.1717431545257568, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.19280575539568345, | |
| "grad_norm": 0.5326686824141037, | |
| "learning_rate": 1.9891656106935873e-06, | |
| "loss": 1.1462079286575317, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.19568345323741007, | |
| "grad_norm": 0.4445628555318413, | |
| "learning_rate": 1.988456713293554e-06, | |
| "loss": 1.174164056777954, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1985611510791367, | |
| "grad_norm": 0.5068823690157335, | |
| "learning_rate": 1.987725487696059e-06, | |
| "loss": 1.3018139600753784, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2014388489208633, | |
| "grad_norm": 0.5106162613433823, | |
| "learning_rate": 1.9869719504187175e-06, | |
| "loss": 1.273469090461731, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.20431654676258992, | |
| "grad_norm": 0.46468998684527285, | |
| "learning_rate": 1.9861961184831453e-06, | |
| "loss": 1.2473914623260498, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.20719424460431654, | |
| "grad_norm": 0.5345828232737263, | |
| "learning_rate": 1.9853980094145696e-06, | |
| "loss": 1.193030834197998, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.21007194244604316, | |
| "grad_norm": 0.5271717020423939, | |
| "learning_rate": 1.9845776412414346e-06, | |
| "loss": 1.1826913356781006, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.21294964028776978, | |
| "grad_norm": 0.4004103214424577, | |
| "learning_rate": 1.9837350324949944e-06, | |
| "loss": 1.055051565170288, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.2158273381294964, | |
| "grad_norm": 0.5075363846617762, | |
| "learning_rate": 1.9828702022088942e-06, | |
| "loss": 1.1969430446624756, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.218705035971223, | |
| "grad_norm": 0.5116674728159791, | |
| "learning_rate": 1.9819831699187407e-06, | |
| "loss": 1.2737852334976196, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.22158273381294963, | |
| "grad_norm": 0.5134518143732013, | |
| "learning_rate": 1.9810739556616607e-06, | |
| "loss": 1.1505439281463623, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.22446043165467625, | |
| "grad_norm": 0.4961762001577513, | |
| "learning_rate": 1.980142579975847e-06, | |
| "loss": 1.1265602111816406, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2273381294964029, | |
| "grad_norm": 0.5222448272100187, | |
| "learning_rate": 1.9791890639000973e-06, | |
| "loss": 1.1243963241577148, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2302158273381295, | |
| "grad_norm": 0.500186205073849, | |
| "learning_rate": 1.9782134289733374e-06, | |
| "loss": 1.2614185810089111, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.23309352517985613, | |
| "grad_norm": 0.49912691652286095, | |
| "learning_rate": 1.9772156972341326e-06, | |
| "loss": 1.1954736709594727, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.23597122302158274, | |
| "grad_norm": 0.4383210281801482, | |
| "learning_rate": 1.9761958912201945e-06, | |
| "loss": 1.125051736831665, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.23884892086330936, | |
| "grad_norm": 0.4960615716168437, | |
| "learning_rate": 1.9751540339678683e-06, | |
| "loss": 1.0172779560089111, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.24172661870503598, | |
| "grad_norm": 0.6116032459432448, | |
| "learning_rate": 1.9740901490116133e-06, | |
| "loss": 1.1732102632522583, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.2446043165467626, | |
| "grad_norm": 0.5176440293767078, | |
| "learning_rate": 1.973004260383471e-06, | |
| "loss": 1.225417137145996, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.2474820143884892, | |
| "grad_norm": 0.5678048576376955, | |
| "learning_rate": 1.9718963926125244e-06, | |
| "loss": 1.0927081108093262, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2503597122302158, | |
| "grad_norm": 0.6364015437310658, | |
| "learning_rate": 1.9707665707243406e-06, | |
| "loss": 1.2957037687301636, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.25323741007194245, | |
| "grad_norm": 0.5809276103827633, | |
| "learning_rate": 1.969614820240407e-06, | |
| "loss": 1.187430500984192, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.25611510791366904, | |
| "grad_norm": 0.47690557886764195, | |
| "learning_rate": 1.9684411671775568e-06, | |
| "loss": 1.1036494970321655, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2589928057553957, | |
| "grad_norm": 0.6365313959891131, | |
| "learning_rate": 1.967245638047378e-06, | |
| "loss": 1.1274656057357788, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.26187050359712233, | |
| "grad_norm": 0.542056873386327, | |
| "learning_rate": 1.9660282598556155e-06, | |
| "loss": 1.1317627429962158, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.2647482014388489, | |
| "grad_norm": 0.5772496326462602, | |
| "learning_rate": 1.964789060101563e-06, | |
| "loss": 1.1629116535186768, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.26762589928057556, | |
| "grad_norm": 0.5006687207127247, | |
| "learning_rate": 1.9635280667774385e-06, | |
| "loss": 1.0691213607788086, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.27050359712230215, | |
| "grad_norm": 0.4948922663102226, | |
| "learning_rate": 1.9622453083677555e-06, | |
| "loss": 1.0724678039550781, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2733812949640288, | |
| "grad_norm": 0.6310621741039645, | |
| "learning_rate": 1.9609408138486773e-06, | |
| "loss": 1.1892088651657104, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2762589928057554, | |
| "grad_norm": 0.5720157291662107, | |
| "learning_rate": 1.959614612687363e-06, | |
| "loss": 1.1208692789077759, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.27913669064748203, | |
| "grad_norm": 0.4968629674990631, | |
| "learning_rate": 1.9582667348413013e-06, | |
| "loss": 1.1870933771133423, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2820143884892086, | |
| "grad_norm": 0.562208720820431, | |
| "learning_rate": 1.9568972107576355e-06, | |
| "loss": 1.1234577894210815, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.28489208633093527, | |
| "grad_norm": 0.5269154276860062, | |
| "learning_rate": 1.9555060713724737e-06, | |
| "loss": 1.0910080671310425, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.28776978417266186, | |
| "grad_norm": 0.5740033705130164, | |
| "learning_rate": 1.9540933481101923e-06, | |
| "loss": 1.1712496280670166, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2906474820143885, | |
| "grad_norm": 0.5087584410520719, | |
| "learning_rate": 1.952659072882723e-06, | |
| "loss": 1.1709492206573486, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.2935251798561151, | |
| "grad_norm": 0.507763542956114, | |
| "learning_rate": 1.9512032780888346e-06, | |
| "loss": 1.2015647888183594, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.29640287769784174, | |
| "grad_norm": 0.46170542788719804, | |
| "learning_rate": 1.9497259966134005e-06, | |
| "loss": 1.100395679473877, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.2992805755395683, | |
| "grad_norm": 0.5868783299117551, | |
| "learning_rate": 1.9482272618266554e-06, | |
| "loss": 1.1746639013290405, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.302158273381295, | |
| "grad_norm": 0.5697876288298696, | |
| "learning_rate": 1.946707107583442e-06, | |
| "loss": 1.106672763824463, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.30503597122302156, | |
| "grad_norm": 0.569742102482636, | |
| "learning_rate": 1.945165568222445e-06, | |
| "loss": 1.213707685470581, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.3079136690647482, | |
| "grad_norm": 0.538364984750885, | |
| "learning_rate": 1.9436026785654175e-06, | |
| "loss": 1.0930910110473633, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.3107913669064748, | |
| "grad_norm": 0.5312913632919434, | |
| "learning_rate": 1.942018473916393e-06, | |
| "loss": 1.142619252204895, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.31366906474820144, | |
| "grad_norm": 0.5975458242690872, | |
| "learning_rate": 1.940412990060888e-06, | |
| "loss": 1.2266335487365723, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.31654676258992803, | |
| "grad_norm": 0.46671843811650277, | |
| "learning_rate": 1.9387862632650944e-06, | |
| "loss": 1.1608915328979492, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3194244604316547, | |
| "grad_norm": 0.5834160016362078, | |
| "learning_rate": 1.937138330275059e-06, | |
| "loss": 1.183951735496521, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.32230215827338127, | |
| "grad_norm": 0.5226121969782161, | |
| "learning_rate": 1.9354692283158553e-06, | |
| "loss": 1.1224737167358398, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3251798561151079, | |
| "grad_norm": 0.5495555876202739, | |
| "learning_rate": 1.9337789950907407e-06, | |
| "loss": 1.0857056379318237, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.32805755395683456, | |
| "grad_norm": 0.4562188941273161, | |
| "learning_rate": 1.9320676687803055e-06, | |
| "loss": 1.0629336833953857, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.33093525179856115, | |
| "grad_norm": 0.4181080129706698, | |
| "learning_rate": 1.930335288041612e-06, | |
| "loss": 1.1054446697235107, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.3338129496402878, | |
| "grad_norm": 0.4892645824645763, | |
| "learning_rate": 1.928581892007318e-06, | |
| "loss": 1.0204641819000244, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3366906474820144, | |
| "grad_norm": 0.42126704545419896, | |
| "learning_rate": 1.926807520284796e-06, | |
| "loss": 1.0821490287780762, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.339568345323741, | |
| "grad_norm": 0.5344372210987457, | |
| "learning_rate": 1.9250122129552364e-06, | |
| "loss": 1.1084657907485962, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3424460431654676, | |
| "grad_norm": 0.502255473105476, | |
| "learning_rate": 1.923196010572744e-06, | |
| "loss": 1.1403509378433228, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.34532374100719426, | |
| "grad_norm": 0.4478418679539473, | |
| "learning_rate": 1.92135895416342e-06, | |
| "loss": 1.1791510581970215, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.34820143884892085, | |
| "grad_norm": 0.5040410026582576, | |
| "learning_rate": 1.9195010852244366e-06, | |
| "loss": 1.1240849494934082, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3510791366906475, | |
| "grad_norm": 0.3694753679987858, | |
| "learning_rate": 1.917622445723099e-06, | |
| "loss": 0.9320825338363647, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3539568345323741, | |
| "grad_norm": 0.5088967091444829, | |
| "learning_rate": 1.9157230780958975e-06, | |
| "loss": 1.1261234283447266, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.35683453237410073, | |
| "grad_norm": 0.48547365885933336, | |
| "learning_rate": 1.9138030252475484e-06, | |
| "loss": 1.0831753015518188, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3597122302158273, | |
| "grad_norm": 0.5680540539558109, | |
| "learning_rate": 1.911862330550027e-06, | |
| "loss": 1.1547625064849854, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.36258992805755397, | |
| "grad_norm": 0.482809160276131, | |
| "learning_rate": 1.9099010378415844e-06, | |
| "loss": 0.9853061437606812, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.36546762589928056, | |
| "grad_norm": 0.5999988506556185, | |
| "learning_rate": 1.90791919142576e-06, | |
| "loss": 1.0587449073791504, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.3683453237410072, | |
| "grad_norm": 0.4786126684230341, | |
| "learning_rate": 1.9059168360703803e-06, | |
| "loss": 1.0581047534942627, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3712230215827338, | |
| "grad_norm": 0.4721916332432008, | |
| "learning_rate": 1.9038940170065466e-06, | |
| "loss": 1.2065491676330566, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.37410071942446044, | |
| "grad_norm": 0.43066899683927695, | |
| "learning_rate": 1.9018507799276131e-06, | |
| "loss": 0.9673759937286377, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.376978417266187, | |
| "grad_norm": 0.4819631279931072, | |
| "learning_rate": 1.8997871709881567e-06, | |
| "loss": 1.055248498916626, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.37985611510791367, | |
| "grad_norm": 0.49209482786786624, | |
| "learning_rate": 1.8977032368029332e-06, | |
| "loss": 1.0030591487884521, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.38273381294964026, | |
| "grad_norm": 0.5018665575618141, | |
| "learning_rate": 1.8955990244458233e-06, | |
| "loss": 1.1191744804382324, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3856115107913669, | |
| "grad_norm": 0.5576523584422169, | |
| "learning_rate": 1.8934745814487712e-06, | |
| "loss": 1.0733585357666016, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.38848920863309355, | |
| "grad_norm": 0.5008878898473639, | |
| "learning_rate": 1.8913299558007095e-06, | |
| "loss": 1.1800191402435303, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.39136690647482014, | |
| "grad_norm": 0.43511113369960597, | |
| "learning_rate": 1.8891651959464758e-06, | |
| "loss": 1.027364730834961, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3942446043165468, | |
| "grad_norm": 0.4765093745936347, | |
| "learning_rate": 1.8869803507857185e-06, | |
| "loss": 1.107445478439331, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3971223021582734, | |
| "grad_norm": 0.46129319329450635, | |
| "learning_rate": 1.884775469671791e-06, | |
| "loss": 1.187384009361267, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.48437113838726986, | |
| "learning_rate": 1.8825506024106396e-06, | |
| "loss": 1.0362842082977295, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.4028776978417266, | |
| "grad_norm": 0.4645234599714407, | |
| "learning_rate": 1.8803057992596747e-06, | |
| "loss": 1.0802561044692993, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.40575539568345326, | |
| "grad_norm": 0.4280947125747255, | |
| "learning_rate": 1.8780411109266385e-06, | |
| "loss": 1.1036925315856934, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.40863309352517985, | |
| "grad_norm": 0.407505252457033, | |
| "learning_rate": 1.8757565885684584e-06, | |
| "loss": 1.0300638675689697, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.4115107913669065, | |
| "grad_norm": 0.3623983611321653, | |
| "learning_rate": 1.8734522837900915e-06, | |
| "loss": 0.982805609703064, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.4143884892086331, | |
| "grad_norm": 0.4850168244727846, | |
| "learning_rate": 1.8711282486433594e-06, | |
| "loss": 1.1880314350128174, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.4172661870503597, | |
| "grad_norm": 0.39657849815671453, | |
| "learning_rate": 1.8687845356257705e-06, | |
| "loss": 1.001549482345581, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.4201438848920863, | |
| "grad_norm": 0.43575502402332317, | |
| "learning_rate": 1.866421197679338e-06, | |
| "loss": 1.1122441291809082, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.42302158273381296, | |
| "grad_norm": 0.42214821227567706, | |
| "learning_rate": 1.8640382881893797e-06, | |
| "loss": 0.9325125217437744, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.42589928057553955, | |
| "grad_norm": 0.49793043845219986, | |
| "learning_rate": 1.8616358609833144e-06, | |
| "loss": 1.1867802143096924, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.4287769784172662, | |
| "grad_norm": 0.5003465171318868, | |
| "learning_rate": 1.8592139703294456e-06, | |
| "loss": 1.1244676113128662, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.4316546762589928, | |
| "grad_norm": 0.4321152662969621, | |
| "learning_rate": 1.8567726709357365e-06, | |
| "loss": 1.0435458421707153, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.43453237410071943, | |
| "grad_norm": 0.42883319369137934, | |
| "learning_rate": 1.854312017948572e-06, | |
| "loss": 0.9999338388442993, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.437410071942446, | |
| "grad_norm": 0.41457898959091355, | |
| "learning_rate": 1.8518320669515145e-06, | |
| "loss": 1.0550625324249268, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.44028776978417267, | |
| "grad_norm": 0.37601633308325455, | |
| "learning_rate": 1.8493328739640494e-06, | |
| "loss": 1.1828843355178833, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.44316546762589926, | |
| "grad_norm": 0.48403246563497276, | |
| "learning_rate": 1.8468144954403174e-06, | |
| "loss": 1.0219019651412964, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4460431654676259, | |
| "grad_norm": 0.4080458449115876, | |
| "learning_rate": 1.8442769882678397e-06, | |
| "loss": 1.064319372177124, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.4489208633093525, | |
| "grad_norm": 0.4307991968666485, | |
| "learning_rate": 1.8417204097662348e-06, | |
| "loss": 0.9629073143005371, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.45179856115107914, | |
| "grad_norm": 0.42498698874553337, | |
| "learning_rate": 1.8391448176859221e-06, | |
| "loss": 0.9967993497848511, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.4546762589928058, | |
| "grad_norm": 0.3864381339362373, | |
| "learning_rate": 1.8365502702068176e-06, | |
| "loss": 0.9952638149261475, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.45755395683453237, | |
| "grad_norm": 0.40273255363591914, | |
| "learning_rate": 1.8339368259370196e-06, | |
| "loss": 1.1115927696228027, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.460431654676259, | |
| "grad_norm": 0.41064614803619237, | |
| "learning_rate": 1.8313045439114854e-06, | |
| "loss": 1.0840253829956055, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4633093525179856, | |
| "grad_norm": 0.5037888020430983, | |
| "learning_rate": 1.8286534835906967e-06, | |
| "loss": 0.9446510076522827, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.46618705035971225, | |
| "grad_norm": 0.5305475182215801, | |
| "learning_rate": 1.8259837048593187e-06, | |
| "loss": 1.074650526046753, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.46906474820143884, | |
| "grad_norm": 0.4475049031892367, | |
| "learning_rate": 1.8232952680248439e-06, | |
| "loss": 1.1149487495422363, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.4719424460431655, | |
| "grad_norm": 0.36490672571189003, | |
| "learning_rate": 1.8205882338162333e-06, | |
| "loss": 1.0125229358673096, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4748201438848921, | |
| "grad_norm": 0.41629528136632005, | |
| "learning_rate": 1.8178626633825417e-06, | |
| "loss": 1.079350471496582, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.4776978417266187, | |
| "grad_norm": 0.49379487126662264, | |
| "learning_rate": 1.8151186182915383e-06, | |
| "loss": 1.057182788848877, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4805755395683453, | |
| "grad_norm": 0.4055282585841769, | |
| "learning_rate": 1.8123561605283163e-06, | |
| "loss": 0.9132846593856812, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.48345323741007196, | |
| "grad_norm": 0.3366384943701116, | |
| "learning_rate": 1.8095753524938903e-06, | |
| "loss": 1.0845749378204346, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.48633093525179855, | |
| "grad_norm": 0.40679871398886064, | |
| "learning_rate": 1.8067762570037885e-06, | |
| "loss": 1.042318344116211, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.4892086330935252, | |
| "grad_norm": 0.35055813919371737, | |
| "learning_rate": 1.8039589372866347e-06, | |
| "loss": 1.01352858543396, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4920863309352518, | |
| "grad_norm": 0.441317686613448, | |
| "learning_rate": 1.8011234569827172e-06, | |
| "loss": 1.0617296695709229, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.4949640287769784, | |
| "grad_norm": 0.41119908042871567, | |
| "learning_rate": 1.798269880142554e-06, | |
| "loss": 1.0274578332901, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.497841726618705, | |
| "grad_norm": 0.4140046822969208, | |
| "learning_rate": 1.7953982712254446e-06, | |
| "loss": 1.1444511413574219, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.5007194244604316, | |
| "grad_norm": 0.360331860974586, | |
| "learning_rate": 1.7925086950980134e-06, | |
| "loss": 1.0049320459365845, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5035971223021583, | |
| "grad_norm": 0.46448969705810783, | |
| "learning_rate": 1.7896012170327466e-06, | |
| "loss": 1.0649842023849487, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5064748201438849, | |
| "grad_norm": 0.379616869145886, | |
| "learning_rate": 1.7866759027065149e-06, | |
| "loss": 1.0953956842422485, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5093525179856115, | |
| "grad_norm": 0.46655176135320064, | |
| "learning_rate": 1.783732818199092e-06, | |
| "loss": 1.1652858257293701, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5122302158273381, | |
| "grad_norm": 0.4331501137557297, | |
| "learning_rate": 1.7807720299916613e-06, | |
| "loss": 1.0503497123718262, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5151079136690647, | |
| "grad_norm": 0.4528588241142532, | |
| "learning_rate": 1.7777936049653146e-06, | |
| "loss": 0.9811398983001709, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.5179856115107914, | |
| "grad_norm": 0.3875771596875948, | |
| "learning_rate": 1.77479761039954e-06, | |
| "loss": 1.0009725093841553, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.520863309352518, | |
| "grad_norm": 0.379690279869989, | |
| "learning_rate": 1.7717841139707038e-06, | |
| "loss": 1.1046425104141235, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.5237410071942447, | |
| "grad_norm": 0.40872536920209385, | |
| "learning_rate": 1.76875318375052e-06, | |
| "loss": 0.9237216711044312, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5266187050359712, | |
| "grad_norm": 0.3943687213438678, | |
| "learning_rate": 1.7657048882045149e-06, | |
| "loss": 1.0758323669433594, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.5294964028776978, | |
| "grad_norm": 0.4366095757286378, | |
| "learning_rate": 1.7626392961904783e-06, | |
| "loss": 1.103142261505127, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5323741007194245, | |
| "grad_norm": 0.4230178437251199, | |
| "learning_rate": 1.7595564769569094e-06, | |
| "loss": 0.9749042987823486, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5352517985611511, | |
| "grad_norm": 0.3416533764601269, | |
| "learning_rate": 1.7564565001414522e-06, | |
| "loss": 0.8281745910644531, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5381294964028777, | |
| "grad_norm": 0.33708804570536016, | |
| "learning_rate": 1.753339435769322e-06, | |
| "loss": 1.102489709854126, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.5410071942446043, | |
| "grad_norm": 0.37891052167871625, | |
| "learning_rate": 1.7502053542517244e-06, | |
| "loss": 1.0745601654052734, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.543884892086331, | |
| "grad_norm": 0.3738524538746129, | |
| "learning_rate": 1.7470543263842642e-06, | |
| "loss": 1.111441969871521, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.5467625899280576, | |
| "grad_norm": 0.39050521266738286, | |
| "learning_rate": 1.7438864233453473e-06, | |
| "loss": 1.1269681453704834, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5496402877697841, | |
| "grad_norm": 0.31482595417583814, | |
| "learning_rate": 1.7407017166945706e-06, | |
| "loss": 1.0488468408584595, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5525179856115108, | |
| "grad_norm": 0.3136290302777941, | |
| "learning_rate": 1.7375002783711076e-06, | |
| "loss": 0.9358277320861816, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5553956834532374, | |
| "grad_norm": 0.3338322035032311, | |
| "learning_rate": 1.7342821806920829e-06, | |
| "loss": 1.072392225265503, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5582733812949641, | |
| "grad_norm": 0.3471468140531117, | |
| "learning_rate": 1.7310474963509378e-06, | |
| "loss": 1.0486462116241455, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5611510791366906, | |
| "grad_norm": 0.38596584622793473, | |
| "learning_rate": 1.72779629841579e-06, | |
| "loss": 0.9716250896453857, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5640287769784172, | |
| "grad_norm": 0.3798579435668601, | |
| "learning_rate": 1.7245286603277803e-06, | |
| "loss": 1.033220648765564, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5669064748201439, | |
| "grad_norm": 0.3425201594360531, | |
| "learning_rate": 1.721244655899416e-06, | |
| "loss": 0.9934518337249756, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.5697841726618705, | |
| "grad_norm": 0.3427994445976512, | |
| "learning_rate": 1.717944359312904e-06, | |
| "loss": 1.134864330291748, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5726618705035971, | |
| "grad_norm": 0.3723446907907705, | |
| "learning_rate": 1.7146278451184717e-06, | |
| "loss": 1.1344006061553955, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5755395683453237, | |
| "grad_norm": 0.33571105673864887, | |
| "learning_rate": 1.7112951882326869e-06, | |
| "loss": 0.9915531873703003, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5784172661870504, | |
| "grad_norm": 0.3692434093950694, | |
| "learning_rate": 1.7079464639367632e-06, | |
| "loss": 1.028855800628662, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.581294964028777, | |
| "grad_norm": 0.3094858241361718, | |
| "learning_rate": 1.7045817478748598e-06, | |
| "loss": 0.9810290932655334, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5841726618705037, | |
| "grad_norm": 0.33222888657473965, | |
| "learning_rate": 1.701201116052374e-06, | |
| "loss": 0.8440494537353516, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5870503597122302, | |
| "grad_norm": 0.3220131017798883, | |
| "learning_rate": 1.6978046448342226e-06, | |
| "loss": 1.0670182704925537, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5899280575539568, | |
| "grad_norm": 0.4023809574277352, | |
| "learning_rate": 1.6943924109431179e-06, | |
| "loss": 1.038970708847046, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.5928057553956835, | |
| "grad_norm": 0.36736787076416194, | |
| "learning_rate": 1.690964491457834e-06, | |
| "loss": 1.0510860681533813, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5956834532374101, | |
| "grad_norm": 0.3488299733915227, | |
| "learning_rate": 1.687520963811467e-06, | |
| "loss": 0.913723886013031, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5985611510791367, | |
| "grad_norm": 0.48422640633599995, | |
| "learning_rate": 1.684061905789684e-06, | |
| "loss": 0.9846644401550293, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6014388489208633, | |
| "grad_norm": 0.31689088814964833, | |
| "learning_rate": 1.6805873955289678e-06, | |
| "loss": 1.038316249847412, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.60431654676259, | |
| "grad_norm": 0.31535153436268476, | |
| "learning_rate": 1.6770975115148503e-06, | |
| "loss": 1.1639020442962646, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6071942446043166, | |
| "grad_norm": 0.32901232219616355, | |
| "learning_rate": 1.6735923325801406e-06, | |
| "loss": 1.0157148838043213, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.6100719424460431, | |
| "grad_norm": 0.32346917708292794, | |
| "learning_rate": 1.670071937903144e-06, | |
| "loss": 0.9528936743736267, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.6129496402877698, | |
| "grad_norm": 0.3431066323853164, | |
| "learning_rate": 1.6665364070058736e-06, | |
| "loss": 1.089216709136963, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.6158273381294964, | |
| "grad_norm": 0.3096527786452577, | |
| "learning_rate": 1.6629858197522535e-06, | |
| "loss": 1.0500307083129883, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.6187050359712231, | |
| "grad_norm": 0.34740584906307037, | |
| "learning_rate": 1.6594202563463149e-06, | |
| "loss": 0.9973140954971313, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.6215827338129496, | |
| "grad_norm": 0.3076575246625187, | |
| "learning_rate": 1.6558397973303851e-06, | |
| "loss": 0.9394571781158447, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.6244604316546762, | |
| "grad_norm": 0.35489785566062343, | |
| "learning_rate": 1.652244523583267e-06, | |
| "loss": 0.9569211006164551, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.6273381294964029, | |
| "grad_norm": 0.33512033241700295, | |
| "learning_rate": 1.6486345163184129e-06, | |
| "loss": 1.0791332721710205, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.6302158273381295, | |
| "grad_norm": 0.3626683432890907, | |
| "learning_rate": 1.6450098570820896e-06, | |
| "loss": 1.0544092655181885, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.6330935251798561, | |
| "grad_norm": 0.358904663222277, | |
| "learning_rate": 1.6413706277515373e-06, | |
| "loss": 0.9803202152252197, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6359712230215827, | |
| "grad_norm": 0.32815545381559164, | |
| "learning_rate": 1.6377169105331182e-06, | |
| "loss": 0.9604759216308594, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.6388489208633094, | |
| "grad_norm": 0.32597650541963474, | |
| "learning_rate": 1.6340487879604617e-06, | |
| "loss": 1.0064623355865479, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.641726618705036, | |
| "grad_norm": 0.3506857994251924, | |
| "learning_rate": 1.630366342892598e-06, | |
| "loss": 1.014646053314209, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.6446043165467625, | |
| "grad_norm": 0.41874730381325936, | |
| "learning_rate": 1.626669658512088e-06, | |
| "loss": 0.9256491661071777, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6474820143884892, | |
| "grad_norm": 0.3188217704851316, | |
| "learning_rate": 1.6229588183231434e-06, | |
| "loss": 0.9941632151603699, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.6503597122302158, | |
| "grad_norm": 0.32011807732834047, | |
| "learning_rate": 1.6192339061497413e-06, | |
| "loss": 0.9773931503295898, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6532374100719425, | |
| "grad_norm": 0.3189594924614036, | |
| "learning_rate": 1.615495006133729e-06, | |
| "loss": 0.9987149238586426, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6561151079136691, | |
| "grad_norm": 0.35037906857078205, | |
| "learning_rate": 1.6117422027329263e-06, | |
| "loss": 0.9832175374031067, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6589928057553956, | |
| "grad_norm": 0.38864609779113907, | |
| "learning_rate": 1.6079755807192136e-06, | |
| "loss": 1.0916314125061035, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6618705035971223, | |
| "grad_norm": 0.30929668859135395, | |
| "learning_rate": 1.604195225176621e-06, | |
| "loss": 0.9629628658294678, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6647482014388489, | |
| "grad_norm": 0.32671840835956706, | |
| "learning_rate": 1.6004012214994035e-06, | |
| "loss": 0.9343143701553345, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6676258992805756, | |
| "grad_norm": 0.33641494062099064, | |
| "learning_rate": 1.5965936553901136e-06, | |
| "loss": 1.0556144714355469, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6705035971223021, | |
| "grad_norm": 0.3187574882066994, | |
| "learning_rate": 1.592772612857665e-06, | |
| "loss": 0.9991135597229004, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.6733812949640288, | |
| "grad_norm": 0.3480834665064568, | |
| "learning_rate": 1.5889381802153896e-06, | |
| "loss": 1.0254430770874023, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.6762589928057554, | |
| "grad_norm": 0.3072999299525753, | |
| "learning_rate": 1.585090444079087e-06, | |
| "loss": 0.985275149345398, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.679136690647482, | |
| "grad_norm": 0.35851120910777423, | |
| "learning_rate": 1.5812294913650694e-06, | |
| "loss": 0.9904893636703491, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6820143884892086, | |
| "grad_norm": 0.31773614133543254, | |
| "learning_rate": 1.5773554092881984e-06, | |
| "loss": 1.0499398708343506, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6848920863309352, | |
| "grad_norm": 0.30921377977469555, | |
| "learning_rate": 1.5734682853599122e-06, | |
| "loss": 1.0339066982269287, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6877697841726619, | |
| "grad_norm": 0.35671236366028325, | |
| "learning_rate": 1.5695682073862525e-06, | |
| "loss": 0.9532429575920105, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6906474820143885, | |
| "grad_norm": 0.3015523412700019, | |
| "learning_rate": 1.5656552634658776e-06, | |
| "loss": 1.038594365119934, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6935251798561151, | |
| "grad_norm": 0.36455740495219996, | |
| "learning_rate": 1.561729541988076e-06, | |
| "loss": 1.0890312194824219, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6964028776978417, | |
| "grad_norm": 0.3680930663786755, | |
| "learning_rate": 1.5577911316307658e-06, | |
| "loss": 1.0601049661636353, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6992805755395683, | |
| "grad_norm": 0.2880128205816018, | |
| "learning_rate": 1.5538401213584948e-06, | |
| "loss": 0.8997229337692261, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.702158273381295, | |
| "grad_norm": 0.32285821272462195, | |
| "learning_rate": 1.549876600420429e-06, | |
| "loss": 0.9955217242240906, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.7050359712230215, | |
| "grad_norm": 0.29115419774124135, | |
| "learning_rate": 1.545900658348338e-06, | |
| "loss": 0.8849923610687256, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.7079136690647482, | |
| "grad_norm": 0.2706455457776965, | |
| "learning_rate": 1.5419123849545708e-06, | |
| "loss": 0.9076135754585266, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.7107913669064748, | |
| "grad_norm": 0.36531081434638296, | |
| "learning_rate": 1.5379118703300282e-06, | |
| "loss": 1.0192983150482178, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.7136690647482015, | |
| "grad_norm": 0.35715016923044796, | |
| "learning_rate": 1.533899204842128e-06, | |
| "loss": 1.034571647644043, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.7165467625899281, | |
| "grad_norm": 0.30344023495404443, | |
| "learning_rate": 1.529874479132763e-06, | |
| "loss": 0.8733739256858826, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.7194244604316546, | |
| "grad_norm": 0.33635279495274495, | |
| "learning_rate": 1.5258377841162533e-06, | |
| "loss": 0.9661943316459656, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7223021582733813, | |
| "grad_norm": 0.3375538070903443, | |
| "learning_rate": 1.5217892109772935e-06, | |
| "loss": 1.0986987352371216, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.7251798561151079, | |
| "grad_norm": 0.2662157952853344, | |
| "learning_rate": 1.5177288511688927e-06, | |
| "loss": 0.9541377425193787, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.7280575539568346, | |
| "grad_norm": 0.30886908842504907, | |
| "learning_rate": 1.5136567964103076e-06, | |
| "loss": 1.0753300189971924, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.7309352517985611, | |
| "grad_norm": 0.30701154449906404, | |
| "learning_rate": 1.5095731386849723e-06, | |
| "loss": 0.9976100921630859, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.7338129496402878, | |
| "grad_norm": 0.303376410309656, | |
| "learning_rate": 1.5054779702384198e-06, | |
| "loss": 1.0058211088180542, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.7366906474820144, | |
| "grad_norm": 0.3652950100731028, | |
| "learning_rate": 1.5013713835761975e-06, | |
| "loss": 1.0633628368377686, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.739568345323741, | |
| "grad_norm": 0.3390438283446466, | |
| "learning_rate": 1.497253471461779e-06, | |
| "loss": 0.8934162259101868, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.7424460431654676, | |
| "grad_norm": 0.3246861490189164, | |
| "learning_rate": 1.493124326914467e-06, | |
| "loss": 1.0370798110961914, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7453237410071942, | |
| "grad_norm": 0.30533785722726153, | |
| "learning_rate": 1.4889840432072945e-06, | |
| "loss": 0.9263877868652344, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.7482014388489209, | |
| "grad_norm": 0.31370371579277184, | |
| "learning_rate": 1.484832713864915e-06, | |
| "loss": 0.9624022245407104, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7510791366906475, | |
| "grad_norm": 0.32008108759680487, | |
| "learning_rate": 1.4806704326614918e-06, | |
| "loss": 0.8735676407814026, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.753956834532374, | |
| "grad_norm": 0.3566203918476789, | |
| "learning_rate": 1.4764972936185795e-06, | |
| "loss": 1.0989207029342651, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.7568345323741007, | |
| "grad_norm": 0.36407543844243995, | |
| "learning_rate": 1.4723133910029996e-06, | |
| "loss": 0.9619901180267334, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7597122302158273, | |
| "grad_norm": 0.29266238338520917, | |
| "learning_rate": 1.4681188193247115e-06, | |
| "loss": 0.9620180130004883, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.762589928057554, | |
| "grad_norm": 0.32115744502647553, | |
| "learning_rate": 1.4639136733346776e-06, | |
| "loss": 0.9723782539367676, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7654676258992805, | |
| "grad_norm": 0.32955472439646183, | |
| "learning_rate": 1.4596980480227222e-06, | |
| "loss": 1.01808762550354, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.7683453237410072, | |
| "grad_norm": 0.30150737980380415, | |
| "learning_rate": 1.4554720386153869e-06, | |
| "loss": 1.0717837810516357, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.7712230215827338, | |
| "grad_norm": 0.2886477892998947, | |
| "learning_rate": 1.4512357405737797e-06, | |
| "loss": 0.8863840699195862, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7741007194244605, | |
| "grad_norm": 0.3310173561528487, | |
| "learning_rate": 1.4469892495914172e-06, | |
| "loss": 0.964940071105957, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7769784172661871, | |
| "grad_norm": 0.2856238879913019, | |
| "learning_rate": 1.4427326615920641e-06, | |
| "loss": 0.9396013021469116, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.7798561151079136, | |
| "grad_norm": 0.2842593394923139, | |
| "learning_rate": 1.4384660727275662e-06, | |
| "loss": 1.0147062540054321, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7827338129496403, | |
| "grad_norm": 0.3377858534929305, | |
| "learning_rate": 1.4341895793756781e-06, | |
| "loss": 1.0019702911376953, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7856115107913669, | |
| "grad_norm": 0.28919748050640776, | |
| "learning_rate": 1.4299032781378863e-06, | |
| "loss": 0.9657357931137085, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7884892086330936, | |
| "grad_norm": 0.2761094660745925, | |
| "learning_rate": 1.4256072658372278e-06, | |
| "loss": 0.9581419229507446, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.7913669064748201, | |
| "grad_norm": 0.32811507081877733, | |
| "learning_rate": 1.4213016395161016e-06, | |
| "loss": 0.9768601655960083, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.7942446043165468, | |
| "grad_norm": 0.3197698268118257, | |
| "learning_rate": 1.416986496434077e-06, | |
| "loss": 1.0802795886993408, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7971223021582734, | |
| "grad_norm": 0.32014493538109184, | |
| "learning_rate": 1.412661934065698e-06, | |
| "loss": 1.129173994064331, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.3408260667112233, | |
| "learning_rate": 1.4083280500982796e-06, | |
| "loss": 1.0172650814056396, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.8028776978417266, | |
| "grad_norm": 0.2994608201736648, | |
| "learning_rate": 1.4039849424297022e-06, | |
| "loss": 1.002464771270752, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.8057553956834532, | |
| "grad_norm": 0.27936016058449986, | |
| "learning_rate": 1.3996327091661994e-06, | |
| "loss": 0.9435924887657166, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8086330935251799, | |
| "grad_norm": 0.3421589464369171, | |
| "learning_rate": 1.3952714486201433e-06, | |
| "loss": 0.9648728370666504, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.8115107913669065, | |
| "grad_norm": 0.3027125759086274, | |
| "learning_rate": 1.3909012593078223e-06, | |
| "loss": 1.0883413553237915, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.814388489208633, | |
| "grad_norm": 0.2718451517981759, | |
| "learning_rate": 1.3865222399472154e-06, | |
| "loss": 0.9606098532676697, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.8172661870503597, | |
| "grad_norm": 0.3439278935498304, | |
| "learning_rate": 1.382134489455765e-06, | |
| "loss": 1.006915807723999, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.8201438848920863, | |
| "grad_norm": 0.25579346143996035, | |
| "learning_rate": 1.3777381069481396e-06, | |
| "loss": 0.9337391257286072, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.823021582733813, | |
| "grad_norm": 0.3050859668016162, | |
| "learning_rate": 1.373333191733995e-06, | |
| "loss": 0.9900962710380554, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.8258992805755395, | |
| "grad_norm": 0.30270443732056235, | |
| "learning_rate": 1.3689198433157332e-06, | |
| "loss": 0.8408849835395813, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.8287769784172662, | |
| "grad_norm": 0.32722776782068325, | |
| "learning_rate": 1.3644981613862523e-06, | |
| "loss": 0.9334912300109863, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.8316546762589928, | |
| "grad_norm": 0.30271696679801074, | |
| "learning_rate": 1.360068245826697e-06, | |
| "loss": 0.9546651840209961, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.8345323741007195, | |
| "grad_norm": 0.30274211349049623, | |
| "learning_rate": 1.3556301967041997e-06, | |
| "loss": 0.9813221096992493, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.837410071942446, | |
| "grad_norm": 0.2635388567144702, | |
| "learning_rate": 1.351184114269622e-06, | |
| "loss": 0.9474866390228271, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.8402877697841726, | |
| "grad_norm": 0.3010633615089385, | |
| "learning_rate": 1.34673009895529e-06, | |
| "loss": 0.986327588558197, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.8431654676258993, | |
| "grad_norm": 0.3250052939342708, | |
| "learning_rate": 1.3422682513727243e-06, | |
| "loss": 0.9753819704055786, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.8460431654676259, | |
| "grad_norm": 0.33588340814315554, | |
| "learning_rate": 1.3377986723103692e-06, | |
| "loss": 0.9891970753669739, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.8489208633093526, | |
| "grad_norm": 0.31646789049784285, | |
| "learning_rate": 1.3333214627313138e-06, | |
| "loss": 0.9514651298522949, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.8517985611510791, | |
| "grad_norm": 0.2813259845708673, | |
| "learning_rate": 1.3288367237710139e-06, | |
| "loss": 0.9831069707870483, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.8546762589928057, | |
| "grad_norm": 0.30604078940680873, | |
| "learning_rate": 1.3243445567350046e-06, | |
| "loss": 0.9211512207984924, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.8575539568345324, | |
| "grad_norm": 0.3167520608936244, | |
| "learning_rate": 1.319845063096615e-06, | |
| "loss": 1.0003859996795654, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.860431654676259, | |
| "grad_norm": 0.31829945664441645, | |
| "learning_rate": 1.3153383444946735e-06, | |
| "loss": 0.8789474964141846, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.8633093525179856, | |
| "grad_norm": 0.31495160151302437, | |
| "learning_rate": 1.3108245027312128e-06, | |
| "loss": 1.0840336084365845, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8661870503597122, | |
| "grad_norm": 0.30915276693739346, | |
| "learning_rate": 1.3063036397691708e-06, | |
| "loss": 1.0036927461624146, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.8690647482014389, | |
| "grad_norm": 0.2941453011820651, | |
| "learning_rate": 1.3017758577300862e-06, | |
| "loss": 1.0740652084350586, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.8719424460431655, | |
| "grad_norm": 0.29455577634561325, | |
| "learning_rate": 1.297241258891793e-06, | |
| "loss": 0.989548921585083, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.874820143884892, | |
| "grad_norm": 0.3299592819973091, | |
| "learning_rate": 1.2926999456861096e-06, | |
| "loss": 1.0820207595825195, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8776978417266187, | |
| "grad_norm": 0.2673487326485298, | |
| "learning_rate": 1.2881520206965243e-06, | |
| "loss": 0.9292148351669312, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8805755395683453, | |
| "grad_norm": 0.28532631258001817, | |
| "learning_rate": 1.2835975866558792e-06, | |
| "loss": 0.9342219233512878, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.883453237410072, | |
| "grad_norm": 0.3025210511532024, | |
| "learning_rate": 1.2790367464440484e-06, | |
| "loss": 0.9670717120170593, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.8863309352517985, | |
| "grad_norm": 0.28896814534982135, | |
| "learning_rate": 1.2744696030856153e-06, | |
| "loss": 0.9335446357727051, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8892086330935252, | |
| "grad_norm": 0.3084903177297785, | |
| "learning_rate": 1.2698962597475445e-06, | |
| "loss": 0.9629756808280945, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.8920863309352518, | |
| "grad_norm": 0.32422156062771545, | |
| "learning_rate": 1.2653168197368519e-06, | |
| "loss": 0.9787018299102783, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8949640287769784, | |
| "grad_norm": 0.30159646505494975, | |
| "learning_rate": 1.2607313864982697e-06, | |
| "loss": 0.9642415642738342, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.897841726618705, | |
| "grad_norm": 0.31856979960613646, | |
| "learning_rate": 1.2561400636119124e-06, | |
| "loss": 1.0449435710906982, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.9007194244604316, | |
| "grad_norm": 0.3458241524079836, | |
| "learning_rate": 1.2515429547909346e-06, | |
| "loss": 1.0429253578186035, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.9035971223021583, | |
| "grad_norm": 0.30946600198200386, | |
| "learning_rate": 1.246940163879189e-06, | |
| "loss": 1.0028799772262573, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.9064748201438849, | |
| "grad_norm": 0.31702914654332653, | |
| "learning_rate": 1.2423317948488813e-06, | |
| "loss": 0.9168355464935303, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.9093525179856116, | |
| "grad_norm": 0.2568478715797543, | |
| "learning_rate": 1.23771795179822e-06, | |
| "loss": 0.9950739145278931, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.9122302158273381, | |
| "grad_norm": 0.31321859143517206, | |
| "learning_rate": 1.233098738949067e-06, | |
| "loss": 1.0762598514556885, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.9151079136690647, | |
| "grad_norm": 0.30023123590979206, | |
| "learning_rate": 1.2284742606445817e-06, | |
| "loss": 0.9474934339523315, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.9179856115107914, | |
| "grad_norm": 0.29274105879380363, | |
| "learning_rate": 1.2238446213468653e-06, | |
| "loss": 0.9199013710021973, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.920863309352518, | |
| "grad_norm": 0.3343568620635621, | |
| "learning_rate": 1.2192099256345999e-06, | |
| "loss": 1.0041630268096924, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.9237410071942446, | |
| "grad_norm": 0.3310327147204012, | |
| "learning_rate": 1.2145702782006862e-06, | |
| "loss": 0.8189488649368286, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.9266187050359712, | |
| "grad_norm": 0.2594459346828645, | |
| "learning_rate": 1.2099257838498797e-06, | |
| "loss": 0.8715246915817261, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.9294964028776979, | |
| "grad_norm": 0.34108254418878664, | |
| "learning_rate": 1.205276547496423e-06, | |
| "loss": 0.9883395433425903, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.9323741007194245, | |
| "grad_norm": 0.2900507060656894, | |
| "learning_rate": 1.200622674161675e-06, | |
| "loss": 1.0369722843170166, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.935251798561151, | |
| "grad_norm": 0.2793616872911977, | |
| "learning_rate": 1.195964268971739e-06, | |
| "loss": 0.923148512840271, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.9381294964028777, | |
| "grad_norm": 0.31499649360389437, | |
| "learning_rate": 1.191301437155088e-06, | |
| "loss": 0.9886481165885925, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.9410071942446043, | |
| "grad_norm": 0.3421793579841603, | |
| "learning_rate": 1.186634284040189e-06, | |
| "loss": 1.049983263015747, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.943884892086331, | |
| "grad_norm": 0.2770772261448908, | |
| "learning_rate": 1.1819629150531216e-06, | |
| "loss": 0.9720487594604492, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.9467625899280575, | |
| "grad_norm": 0.31715029343065254, | |
| "learning_rate": 1.1772874357151978e-06, | |
| "loss": 0.9858945608139038, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.9496402877697842, | |
| "grad_norm": 0.27382247211499205, | |
| "learning_rate": 1.1726079516405775e-06, | |
| "loss": 0.8920480012893677, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.9525179856115108, | |
| "grad_norm": 0.3233112136350598, | |
| "learning_rate": 1.1679245685338845e-06, | |
| "loss": 1.059034824371338, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.9553956834532374, | |
| "grad_norm": 0.3014575533302111, | |
| "learning_rate": 1.1632373921878167e-06, | |
| "loss": 0.9916867017745972, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.958273381294964, | |
| "grad_norm": 0.2733813212594252, | |
| "learning_rate": 1.1585465284807575e-06, | |
| "loss": 1.0110840797424316, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.9611510791366906, | |
| "grad_norm": 0.28448057189574405, | |
| "learning_rate": 1.1538520833743843e-06, | |
| "loss": 0.9681780338287354, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.9640287769784173, | |
| "grad_norm": 0.28115226388230347, | |
| "learning_rate": 1.1491541629112744e-06, | |
| "loss": 0.9256088733673096, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.9669064748201439, | |
| "grad_norm": 0.29939601958186174, | |
| "learning_rate": 1.1444528732125096e-06, | |
| "loss": 0.9332914352416992, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.9697841726618706, | |
| "grad_norm": 0.32298682544185786, | |
| "learning_rate": 1.1397483204752789e-06, | |
| "loss": 0.9759551882743835, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.9726618705035971, | |
| "grad_norm": 0.32833751314810994, | |
| "learning_rate": 1.1350406109704804e-06, | |
| "loss": 0.955263614654541, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.9755395683453237, | |
| "grad_norm": 0.3075747126988841, | |
| "learning_rate": 1.1303298510403204e-06, | |
| "loss": 1.0056906938552856, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.9784172661870504, | |
| "grad_norm": 0.3107177190802721, | |
| "learning_rate": 1.1256161470959105e-06, | |
| "loss": 1.0631227493286133, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.981294964028777, | |
| "grad_norm": 0.26705526854232686, | |
| "learning_rate": 1.1208996056148645e-06, | |
| "loss": 0.901911735534668, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.9841726618705036, | |
| "grad_norm": 0.35096026513434014, | |
| "learning_rate": 1.116180333138894e-06, | |
| "loss": 0.9325671195983887, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.9870503597122302, | |
| "grad_norm": 0.3163252628308116, | |
| "learning_rate": 1.1114584362714004e-06, | |
| "loss": 0.9670236706733704, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.9899280575539569, | |
| "grad_norm": 0.3187089894926652, | |
| "learning_rate": 1.1067340216750666e-06, | |
| "loss": 0.9988418221473694, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9928057553956835, | |
| "grad_norm": 0.31077878018423455, | |
| "learning_rate": 1.1020071960694498e-06, | |
| "loss": 0.9381593465805054, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.99568345323741, | |
| "grad_norm": 0.3019571122091896, | |
| "learning_rate": 1.0972780662285681e-06, | |
| "loss": 0.993405818939209, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9985611510791367, | |
| "grad_norm": 0.30414175155215467, | |
| "learning_rate": 1.0925467389784904e-06, | |
| "loss": 0.9964547753334045, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.39789673725223623, | |
| "learning_rate": 1.0878133211949227e-06, | |
| "loss": 0.8202004432678223, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.0028776978417266, | |
| "grad_norm": 0.2656098302983397, | |
| "learning_rate": 1.0830779198007942e-06, | |
| "loss": 0.9116101264953613, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.0057553956834533, | |
| "grad_norm": 0.3008389282200421, | |
| "learning_rate": 1.0783406417638417e-06, | |
| "loss": 0.9478936791419983, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.00863309352518, | |
| "grad_norm": 0.35871582917260014, | |
| "learning_rate": 1.0736015940941926e-06, | |
| "loss": 0.8595709800720215, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.0115107913669066, | |
| "grad_norm": 0.31305380574483693, | |
| "learning_rate": 1.0688608838419494e-06, | |
| "loss": 0.8735829591751099, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.014388489208633, | |
| "grad_norm": 0.3548820441522539, | |
| "learning_rate": 1.0641186180947708e-06, | |
| "loss": 0.9741727113723755, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.0172661870503596, | |
| "grad_norm": 0.36321462939223775, | |
| "learning_rate": 1.059374903975451e-06, | |
| "loss": 0.8974572420120239, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.0201438848920863, | |
| "grad_norm": 0.29496183204532933, | |
| "learning_rate": 1.0546298486395032e-06, | |
| "loss": 0.9210361242294312, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.023021582733813, | |
| "grad_norm": 0.2928921022780455, | |
| "learning_rate": 1.0498835592727356e-06, | |
| "loss": 0.9430476427078247, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.0258992805755396, | |
| "grad_norm": 0.3049343344878922, | |
| "learning_rate": 1.0451361430888335e-06, | |
| "loss": 0.861330509185791, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.0287769784172662, | |
| "grad_norm": 0.329234989560513, | |
| "learning_rate": 1.0403877073269346e-06, | |
| "loss": 0.9548070430755615, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.0316546762589929, | |
| "grad_norm": 0.2927660935027829, | |
| "learning_rate": 1.0356383592492083e-06, | |
| "loss": 0.9394206404685974, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.0345323741007195, | |
| "grad_norm": 0.28029486425646316, | |
| "learning_rate": 1.0308882061384322e-06, | |
| "loss": 0.940388560295105, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.037410071942446, | |
| "grad_norm": 0.3277189443862227, | |
| "learning_rate": 1.0261373552955689e-06, | |
| "loss": 1.0485488176345825, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.0402877697841726, | |
| "grad_norm": 0.3336979554860064, | |
| "learning_rate": 1.021385914037341e-06, | |
| "loss": 1.006148338317871, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.0431654676258992, | |
| "grad_norm": 0.3034246503039526, | |
| "learning_rate": 1.0166339896938096e-06, | |
| "loss": 1.040244460105896, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.0460431654676259, | |
| "grad_norm": 0.2909854917188287, | |
| "learning_rate": 1.0118816896059472e-06, | |
| "loss": 1.0620298385620117, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.0489208633093525, | |
| "grad_norm": 0.3342829322918414, | |
| "learning_rate": 1.0071291211232142e-06, | |
| "loss": 1.0369703769683838, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.0517985611510792, | |
| "grad_norm": 0.3620459894772739, | |
| "learning_rate": 1.0023763916011337e-06, | |
| "loss": 1.005780816078186, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.0546762589928058, | |
| "grad_norm": 0.3245799271587319, | |
| "learning_rate": 9.976236083988662e-07, | |
| "loss": 1.0978028774261475, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.0575539568345325, | |
| "grad_norm": 0.2906620053944161, | |
| "learning_rate": 9.928708788767857e-07, | |
| "loss": 1.0192337036132812, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.0604316546762589, | |
| "grad_norm": 0.2788940484645077, | |
| "learning_rate": 9.881183103940525e-07, | |
| "loss": 1.0336426496505737, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.0633093525179855, | |
| "grad_norm": 0.2646864305544589, | |
| "learning_rate": 9.833660103061903e-07, | |
| "loss": 0.9359861612319946, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.0661870503597122, | |
| "grad_norm": 0.3024535696584821, | |
| "learning_rate": 9.78614085962659e-07, | |
| "loss": 0.9596098065376282, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.0690647482014388, | |
| "grad_norm": 0.3433854130018685, | |
| "learning_rate": 9.738626447044315e-07, | |
| "loss": 0.9648246169090271, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.0719424460431655, | |
| "grad_norm": 0.29727053840895096, | |
| "learning_rate": 9.691117938615677e-07, | |
| "loss": 0.937362551689148, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.074820143884892, | |
| "grad_norm": 0.3148473300114735, | |
| "learning_rate": 9.643616407507916e-07, | |
| "loss": 1.0278003215789795, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.0776978417266188, | |
| "grad_norm": 0.34189586023197116, | |
| "learning_rate": 9.596122926730653e-07, | |
| "loss": 1.053139090538025, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.0805755395683454, | |
| "grad_norm": 0.2979805327926045, | |
| "learning_rate": 9.548638569111664e-07, | |
| "loss": 0.9690728187561035, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.083453237410072, | |
| "grad_norm": 0.32530063715847013, | |
| "learning_rate": 9.501164407272641e-07, | |
| "loss": 0.9638134837150574, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.0863309352517985, | |
| "grad_norm": 0.28340436102152416, | |
| "learning_rate": 9.453701513604971e-07, | |
| "loss": 1.0154237747192383, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.0892086330935251, | |
| "grad_norm": 0.3344518694998693, | |
| "learning_rate": 9.406250960245492e-07, | |
| "loss": 1.0023622512817383, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.0920863309352518, | |
| "grad_norm": 0.3057560258941549, | |
| "learning_rate": 9.358813819052293e-07, | |
| "loss": 0.943859338760376, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.0949640287769784, | |
| "grad_norm": 0.31581539413489, | |
| "learning_rate": 9.311391161580505e-07, | |
| "loss": 0.9440896511077881, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.097841726618705, | |
| "grad_norm": 0.28253845985432785, | |
| "learning_rate": 9.263984059058073e-07, | |
| "loss": 0.9603610634803772, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.1007194244604317, | |
| "grad_norm": 0.3743060929428334, | |
| "learning_rate": 9.216593582361584e-07, | |
| "loss": 0.9380893707275391, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.1035971223021583, | |
| "grad_norm": 0.29782880129060824, | |
| "learning_rate": 9.169220801992054e-07, | |
| "loss": 0.9593515396118164, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.106474820143885, | |
| "grad_norm": 0.25830569907542367, | |
| "learning_rate": 9.121866788050772e-07, | |
| "loss": 0.9210997819900513, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.1093525179856114, | |
| "grad_norm": 0.29300460156324154, | |
| "learning_rate": 9.074532610215097e-07, | |
| "loss": 0.9233313798904419, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.112230215827338, | |
| "grad_norm": 0.2922799068070249, | |
| "learning_rate": 9.027219337714323e-07, | |
| "loss": 0.9572200179100037, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.1151079136690647, | |
| "grad_norm": 0.294905286599111, | |
| "learning_rate": 8.979928039305502e-07, | |
| "loss": 1.0306824445724487, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.1179856115107913, | |
| "grad_norm": 0.3096443969458927, | |
| "learning_rate": 8.932659783249332e-07, | |
| "loss": 0.9011950492858887, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.120863309352518, | |
| "grad_norm": 0.269705652072412, | |
| "learning_rate": 8.885415637285997e-07, | |
| "loss": 0.9103861451148987, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.1237410071942446, | |
| "grad_norm": 0.3045551669252975, | |
| "learning_rate": 8.838196668611056e-07, | |
| "loss": 0.9500089883804321, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.1266187050359713, | |
| "grad_norm": 0.3584458914570723, | |
| "learning_rate": 8.791003943851352e-07, | |
| "loss": 1.0625544786453247, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.129496402877698, | |
| "grad_norm": 0.40204111666436204, | |
| "learning_rate": 8.743838529040896e-07, | |
| "loss": 1.0451273918151855, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.1323741007194243, | |
| "grad_norm": 0.2801446258828758, | |
| "learning_rate": 8.696701489596796e-07, | |
| "loss": 0.8780025839805603, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.135251798561151, | |
| "grad_norm": 0.30848025340500973, | |
| "learning_rate": 8.649593890295195e-07, | |
| "loss": 0.853165328502655, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.1381294964028776, | |
| "grad_norm": 0.2774601092911939, | |
| "learning_rate": 8.602516795247212e-07, | |
| "loss": 0.997830331325531, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.1410071942446043, | |
| "grad_norm": 0.3339281391141689, | |
| "learning_rate": 8.555471267874904e-07, | |
| "loss": 1.0442490577697754, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.143884892086331, | |
| "grad_norm": 0.2740872772386324, | |
| "learning_rate": 8.508458370887254e-07, | |
| "loss": 0.9518193602561951, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.1467625899280576, | |
| "grad_norm": 0.2720117171082711, | |
| "learning_rate": 8.461479166256155e-07, | |
| "loss": 0.8949469327926636, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.1496402877697842, | |
| "grad_norm": 0.296704642333982, | |
| "learning_rate": 8.414534715192424e-07, | |
| "loss": 1.002563714981079, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.1525179856115109, | |
| "grad_norm": 0.3435410162561758, | |
| "learning_rate": 8.367626078121836e-07, | |
| "loss": 0.972290575504303, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.1553956834532375, | |
| "grad_norm": 0.3129884619450547, | |
| "learning_rate": 8.320754314661158e-07, | |
| "loss": 1.01462984085083, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.158273381294964, | |
| "grad_norm": 0.30417813847501757, | |
| "learning_rate": 8.273920483594224e-07, | |
| "loss": 0.9698868989944458, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.1611510791366906, | |
| "grad_norm": 0.3020411325830846, | |
| "learning_rate": 8.227125642848023e-07, | |
| "loss": 0.9128695726394653, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.1640287769784172, | |
| "grad_norm": 0.346879261409609, | |
| "learning_rate": 8.180370849468783e-07, | |
| "loss": 0.9776325821876526, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.1669064748201439, | |
| "grad_norm": 0.36045310471755976, | |
| "learning_rate": 8.133657159598107e-07, | |
| "loss": 1.0711374282836914, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.1697841726618705, | |
| "grad_norm": 0.33405600358385434, | |
| "learning_rate": 8.086985628449118e-07, | |
| "loss": 1.01808500289917, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.1726618705035972, | |
| "grad_norm": 0.33784058532809186, | |
| "learning_rate": 8.040357310282614e-07, | |
| "loss": 1.073177695274353, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.1755395683453238, | |
| "grad_norm": 0.31432792819184735, | |
| "learning_rate": 7.993773258383251e-07, | |
| "loss": 0.9766973257064819, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.1784172661870504, | |
| "grad_norm": 0.29143664503321964, | |
| "learning_rate": 7.94723452503577e-07, | |
| "loss": 0.8510106801986694, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.181294964028777, | |
| "grad_norm": 0.3117754281019984, | |
| "learning_rate": 7.900742161501203e-07, | |
| "loss": 0.9605945348739624, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.1841726618705035, | |
| "grad_norm": 0.3083610748282882, | |
| "learning_rate": 7.854297217993138e-07, | |
| "loss": 1.0148074626922607, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.1870503597122302, | |
| "grad_norm": 0.3292067324035066, | |
| "learning_rate": 7.807900743654003e-07, | |
| "loss": 1.0121517181396484, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.1899280575539568, | |
| "grad_norm": 0.2906819013001279, | |
| "learning_rate": 7.761553786531344e-07, | |
| "loss": 0.9553067684173584, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.1928057553956835, | |
| "grad_norm": 0.25709992178123586, | |
| "learning_rate": 7.71525739355418e-07, | |
| "loss": 0.8994815945625305, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.19568345323741, | |
| "grad_norm": 0.36452886143608954, | |
| "learning_rate": 7.669012610509332e-07, | |
| "loss": 0.953561544418335, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.1985611510791367, | |
| "grad_norm": 0.3247835868196829, | |
| "learning_rate": 7.622820482017803e-07, | |
| "loss": 0.9593473076820374, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.2014388489208634, | |
| "grad_norm": 0.2805164760032836, | |
| "learning_rate": 7.57668205151119e-07, | |
| "loss": 0.9459452629089355, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.2043165467625898, | |
| "grad_norm": 0.28523487135593184, | |
| "learning_rate": 7.53059836120811e-07, | |
| "loss": 0.9797439575195312, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.2071942446043165, | |
| "grad_norm": 0.3003875524590878, | |
| "learning_rate": 7.484570452090654e-07, | |
| "loss": 0.9212760925292969, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.210071942446043, | |
| "grad_norm": 0.34132960956027913, | |
| "learning_rate": 7.438599363880873e-07, | |
| "loss": 1.0429980754852295, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.2129496402877697, | |
| "grad_norm": 0.30371204747015557, | |
| "learning_rate": 7.3926861350173e-07, | |
| "loss": 0.9603173732757568, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.2158273381294964, | |
| "grad_norm": 0.3162676331919284, | |
| "learning_rate": 7.346831802631485e-07, | |
| "loss": 1.010259985923767, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.218705035971223, | |
| "grad_norm": 0.335784603934896, | |
| "learning_rate": 7.301037402524554e-07, | |
| "loss": 0.9941245913505554, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.2215827338129497, | |
| "grad_norm": 0.34689945542903367, | |
| "learning_rate": 7.255303969143847e-07, | |
| "loss": 0.9076559543609619, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.2244604316546763, | |
| "grad_norm": 0.30699564205037894, | |
| "learning_rate": 7.209632535559517e-07, | |
| "loss": 0.9393267035484314, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.227338129496403, | |
| "grad_norm": 0.3645881875578552, | |
| "learning_rate": 7.164024133441209e-07, | |
| "loss": 1.0797785520553589, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.2302158273381294, | |
| "grad_norm": 0.30842152383790683, | |
| "learning_rate": 7.118479793034757e-07, | |
| "loss": 0.8502181172370911, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.233093525179856, | |
| "grad_norm": 0.2837918860269475, | |
| "learning_rate": 7.073000543138903e-07, | |
| "loss": 0.8781344294548035, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.2359712230215827, | |
| "grad_norm": 0.30258666124852246, | |
| "learning_rate": 7.027587411082068e-07, | |
| "loss": 0.9787595272064209, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.2388489208633093, | |
| "grad_norm": 0.32255457306195484, | |
| "learning_rate": 6.98224142269914e-07, | |
| "loss": 1.0447101593017578, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.241726618705036, | |
| "grad_norm": 0.2842145651170118, | |
| "learning_rate": 6.936963602308296e-07, | |
| "loss": 0.8477309942245483, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.2446043165467626, | |
| "grad_norm": 0.2857078174523759, | |
| "learning_rate": 6.891754972687872e-07, | |
| "loss": 0.973019003868103, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.2474820143884893, | |
| "grad_norm": 0.26675519419868937, | |
| "learning_rate": 6.846616555053265e-07, | |
| "loss": 0.8788484930992126, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.2503597122302157, | |
| "grad_norm": 0.2722315460093143, | |
| "learning_rate": 6.80154936903385e-07, | |
| "loss": 0.8963809013366699, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.2532374100719426, | |
| "grad_norm": 0.321353554344839, | |
| "learning_rate": 6.756554432649952e-07, | |
| "loss": 0.9304237365722656, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.256115107913669, | |
| "grad_norm": 0.8802677887155806, | |
| "learning_rate": 6.711632762289863e-07, | |
| "loss": 0.9569498300552368, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.2589928057553956, | |
| "grad_norm": 0.3027788818991336, | |
| "learning_rate": 6.666785372686862e-07, | |
| "loss": 0.9950339198112488, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.2618705035971223, | |
| "grad_norm": 0.3278298564206992, | |
| "learning_rate": 6.622013276896309e-07, | |
| "loss": 1.0428767204284668, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.264748201438849, | |
| "grad_norm": 0.322273469382939, | |
| "learning_rate": 6.577317486272756e-07, | |
| "loss": 1.0519962310791016, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.2676258992805756, | |
| "grad_norm": 0.27000899918490673, | |
| "learning_rate": 6.5326990104471e-07, | |
| "loss": 0.9430403709411621, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.2705035971223022, | |
| "grad_norm": 0.2733597667465118, | |
| "learning_rate": 6.488158857303778e-07, | |
| "loss": 0.8923604488372803, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.2733812949640289, | |
| "grad_norm": 0.3379057723615061, | |
| "learning_rate": 6.443698032958003e-07, | |
| "loss": 0.8423130512237549, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.2762589928057553, | |
| "grad_norm": 0.298352488485131, | |
| "learning_rate": 6.399317541733029e-07, | |
| "loss": 0.8984063863754272, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.2791366906474821, | |
| "grad_norm": 0.3079552906979132, | |
| "learning_rate": 6.355018386137474e-07, | |
| "loss": 1.0057708024978638, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.2820143884892086, | |
| "grad_norm": 0.3494450738277216, | |
| "learning_rate": 6.310801566842671e-07, | |
| "loss": 1.0255926847457886, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.2848920863309352, | |
| "grad_norm": 0.32073469498291907, | |
| "learning_rate": 6.266668082660051e-07, | |
| "loss": 0.9159607291221619, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.2877697841726619, | |
| "grad_norm": 0.30277181047008334, | |
| "learning_rate": 6.222618930518604e-07, | |
| "loss": 0.9396940469741821, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.2906474820143885, | |
| "grad_norm": 0.2996729716461448, | |
| "learning_rate": 6.178655105442347e-07, | |
| "loss": 0.9432433247566223, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.2935251798561151, | |
| "grad_norm": 0.31105342806959047, | |
| "learning_rate": 6.134777600527845e-07, | |
| "loss": 0.963239848613739, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.2964028776978418, | |
| "grad_norm": 0.30886949969557964, | |
| "learning_rate": 6.09098740692178e-07, | |
| "loss": 0.8668818473815918, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.2992805755395684, | |
| "grad_norm": 0.34250157527572056, | |
| "learning_rate": 6.047285513798568e-07, | |
| "loss": 0.9877142310142517, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.3021582733812949, | |
| "grad_norm": 0.3321433510552703, | |
| "learning_rate": 6.003672908338008e-07, | |
| "loss": 1.1121788024902344, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.3050359712230215, | |
| "grad_norm": 0.34432256224945607, | |
| "learning_rate": 5.96015057570298e-07, | |
| "loss": 0.9205185770988464, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.3079136690647482, | |
| "grad_norm": 0.3129154481448231, | |
| "learning_rate": 5.916719499017206e-07, | |
| "loss": 0.9529520869255066, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.3107913669064748, | |
| "grad_norm": 0.3072358733262211, | |
| "learning_rate": 5.873380659343021e-07, | |
| "loss": 0.8947219252586365, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.3136690647482014, | |
| "grad_norm": 0.2689806016977398, | |
| "learning_rate": 5.83013503565923e-07, | |
| "loss": 0.8450409173965454, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.316546762589928, | |
| "grad_norm": 0.30257611336308615, | |
| "learning_rate": 5.786983604838983e-07, | |
| "loss": 0.9042650461196899, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.3194244604316547, | |
| "grad_norm": 0.30536933730041105, | |
| "learning_rate": 5.743927341627722e-07, | |
| "loss": 0.9189790487289429, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.3223021582733812, | |
| "grad_norm": 0.3116994635317501, | |
| "learning_rate": 5.700967218621133e-07, | |
| "loss": 0.9711490869522095, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.325179856115108, | |
| "grad_norm": 0.29567216593755763, | |
| "learning_rate": 5.658104206243221e-07, | |
| "loss": 0.9121512174606323, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.3280575539568344, | |
| "grad_norm": 0.33097880600820834, | |
| "learning_rate": 5.615339272724337e-07, | |
| "loss": 1.0232415199279785, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.330935251798561, | |
| "grad_norm": 0.30421121109984933, | |
| "learning_rate": 5.572673384079361e-07, | |
| "loss": 0.9682353734970093, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.3338129496402877, | |
| "grad_norm": 0.2989421078796733, | |
| "learning_rate": 5.530107504085829e-07, | |
| "loss": 1.0788567066192627, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.3366906474820144, | |
| "grad_norm": 0.3368608300644779, | |
| "learning_rate": 5.487642594262203e-07, | |
| "loss": 1.0391610860824585, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.339568345323741, | |
| "grad_norm": 0.31237915616425, | |
| "learning_rate": 5.445279613846132e-07, | |
| "loss": 0.97783362865448, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.3424460431654677, | |
| "grad_norm": 0.2917517042157253, | |
| "learning_rate": 5.40301951977278e-07, | |
| "loss": 0.9356849193572998, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.3453237410071943, | |
| "grad_norm": 0.2906559333604664, | |
| "learning_rate": 5.360863266653227e-07, | |
| "loss": 0.9209206104278564, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.3482014388489207, | |
| "grad_norm": 0.2762311855557999, | |
| "learning_rate": 5.318811806752883e-07, | |
| "loss": 0.8892006278038025, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.3510791366906476, | |
| "grad_norm": 0.2843773069845965, | |
| "learning_rate": 5.276866089970004e-07, | |
| "loss": 0.8870881795883179, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.353956834532374, | |
| "grad_norm": 0.3214966447300032, | |
| "learning_rate": 5.235027063814204e-07, | |
| "loss": 1.05729341506958, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.3568345323741007, | |
| "grad_norm": 0.29929162633018896, | |
| "learning_rate": 5.193295673385081e-07, | |
| "loss": 0.9966158866882324, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.3597122302158273, | |
| "grad_norm": 0.2845609225335763, | |
| "learning_rate": 5.151672861350849e-07, | |
| "loss": 0.983919084072113, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.362589928057554, | |
| "grad_norm": 0.2872905985027109, | |
| "learning_rate": 5.110159567927056e-07, | |
| "loss": 0.9776226282119751, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.3654676258992806, | |
| "grad_norm": 0.3004471218975015, | |
| "learning_rate": 5.068756730855328e-07, | |
| "loss": 0.8701659440994263, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.3683453237410073, | |
| "grad_norm": 0.27295033239262645, | |
| "learning_rate": 5.027465285382213e-07, | |
| "loss": 0.8881811499595642, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.371223021582734, | |
| "grad_norm": 0.2943944847225173, | |
| "learning_rate": 4.986286164238025e-07, | |
| "loss": 0.8865438103675842, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.3741007194244603, | |
| "grad_norm": 0.2867589741491455, | |
| "learning_rate": 4.945220297615805e-07, | |
| "loss": 0.9757734537124634, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.376978417266187, | |
| "grad_norm": 0.30445366971206, | |
| "learning_rate": 4.904268613150278e-07, | |
| "loss": 0.9451441764831543, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.3798561151079136, | |
| "grad_norm": 0.3764482983471657, | |
| "learning_rate": 4.863432035896924e-07, | |
| "loss": 1.0263563394546509, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.3827338129496403, | |
| "grad_norm": 0.28674275025350787, | |
| "learning_rate": 4.822711488311076e-07, | |
| "loss": 0.9233589768409729, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.385611510791367, | |
| "grad_norm": 0.28357755576379123, | |
| "learning_rate": 4.782107890227065e-07, | |
| "loss": 0.8945414423942566, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.3884892086330936, | |
| "grad_norm": 0.3296678488028909, | |
| "learning_rate": 4.7416221588374695e-07, | |
| "loss": 0.92512047290802, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.3913669064748202, | |
| "grad_norm": 0.34335013571463424, | |
| "learning_rate": 4.701255208672371e-07, | |
| "loss": 0.8945969343185425, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.3942446043165468, | |
| "grad_norm": 0.2681643725763488, | |
| "learning_rate": 4.6610079515787217e-07, | |
| "loss": 0.8868216872215271, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.3971223021582735, | |
| "grad_norm": 0.29646359252343524, | |
| "learning_rate": 4.620881296699718e-07, | |
| "loss": 0.8830418586730957, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.34972600472566895, | |
| "learning_rate": 4.5808761504542915e-07, | |
| "loss": 1.0035524368286133, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.4028776978417266, | |
| "grad_norm": 0.3131507468407678, | |
| "learning_rate": 4.5409934165166174e-07, | |
| "loss": 0.9483344554901123, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.4057553956834532, | |
| "grad_norm": 0.3077131853121991, | |
| "learning_rate": 4.501233995795708e-07, | |
| "loss": 1.042191982269287, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.4086330935251798, | |
| "grad_norm": 0.30297924853409636, | |
| "learning_rate": 4.4615987864150517e-07, | |
| "loss": 0.9574159383773804, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.4115107913669065, | |
| "grad_norm": 0.3101670489465847, | |
| "learning_rate": 4.4220886836923443e-07, | |
| "loss": 0.9550837874412537, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.4143884892086331, | |
| "grad_norm": 0.3028518200998954, | |
| "learning_rate": 4.382704580119242e-07, | |
| "loss": 0.9465584754943848, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.4172661870503598, | |
| "grad_norm": 0.3563033540312973, | |
| "learning_rate": 4.343447365341225e-07, | |
| "loss": 1.0004384517669678, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.4201438848920862, | |
| "grad_norm": 0.33396685382103786, | |
| "learning_rate": 4.3043179261374775e-07, | |
| "loss": 0.9882891774177551, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.423021582733813, | |
| "grad_norm": 0.30772910816683374, | |
| "learning_rate": 4.265317146400876e-07, | |
| "loss": 0.9874916672706604, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.4258992805755395, | |
| "grad_norm": 0.30524559446834776, | |
| "learning_rate": 4.226445907118018e-07, | |
| "loss": 0.9210883378982544, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.4287769784172661, | |
| "grad_norm": 0.3267114002295349, | |
| "learning_rate": 4.1877050863493037e-07, | |
| "loss": 0.9688763618469238, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.4316546762589928, | |
| "grad_norm": 0.27339720775223914, | |
| "learning_rate": 4.1490955592091325e-07, | |
| "loss": 0.8747698068618774, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.4345323741007194, | |
| "grad_norm": 0.28341881156979953, | |
| "learning_rate": 4.110618197846105e-07, | |
| "loss": 0.9002431035041809, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.437410071942446, | |
| "grad_norm": 0.3157537655118046, | |
| "learning_rate": 4.0722738714233475e-07, | |
| "loss": 0.9333710074424744, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.4402877697841727, | |
| "grad_norm": 0.3385136686246348, | |
| "learning_rate": 4.0340634460988634e-07, | |
| "loss": 0.9397541284561157, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.4431654676258994, | |
| "grad_norm": 0.2799609416561718, | |
| "learning_rate": 3.9959877850059654e-07, | |
| "loss": 0.9181256890296936, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.4460431654676258, | |
| "grad_norm": 0.32743527244615317, | |
| "learning_rate": 3.958047748233789e-07, | |
| "loss": 0.9613093733787537, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.4489208633093524, | |
| "grad_norm": 0.31262261972142885, | |
| "learning_rate": 3.920244192807864e-07, | |
| "loss": 1.006971836090088, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.451798561151079, | |
| "grad_norm": 0.2756374429613347, | |
| "learning_rate": 3.8825779726707363e-07, | |
| "loss": 0.9426612854003906, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.4546762589928057, | |
| "grad_norm": 0.31900967384513096, | |
| "learning_rate": 3.845049938662709e-07, | |
| "loss": 1.0267070531845093, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.4575539568345324, | |
| "grad_norm": 0.2806375956545195, | |
| "learning_rate": 3.807660938502588e-07, | |
| "loss": 0.8537903428077698, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.460431654676259, | |
| "grad_norm": 0.2679665420161734, | |
| "learning_rate": 3.770411816768567e-07, | |
| "loss": 0.8869454860687256, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.4633093525179857, | |
| "grad_norm": 0.2940220938489727, | |
| "learning_rate": 3.733303414879121e-07, | |
| "loss": 0.9901649951934814, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.4661870503597123, | |
| "grad_norm": 0.3002023454804642, | |
| "learning_rate": 3.696336571074019e-07, | |
| "loss": 0.893314003944397, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.469064748201439, | |
| "grad_norm": 0.2989449706350416, | |
| "learning_rate": 3.659512120395384e-07, | |
| "loss": 0.9651301503181458, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.4719424460431654, | |
| "grad_norm": 0.31184751443962444, | |
| "learning_rate": 3.6228308946688156e-07, | |
| "loss": 0.9276424646377563, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.474820143884892, | |
| "grad_norm": 0.3280970794871676, | |
| "learning_rate": 3.586293722484628e-07, | |
| "loss": 0.9295877814292908, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.4776978417266187, | |
| "grad_norm": 0.30361568371256686, | |
| "learning_rate": 3.549901429179103e-07, | |
| "loss": 0.9780776500701904, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.4805755395683453, | |
| "grad_norm": 0.31276714037726877, | |
| "learning_rate": 3.513654836815871e-07, | |
| "loss": 0.9611223936080933, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.483453237410072, | |
| "grad_norm": 0.29813652177832717, | |
| "learning_rate": 3.477554764167333e-07, | |
| "loss": 0.9399750232696533, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.4863309352517986, | |
| "grad_norm": 0.2879175131540814, | |
| "learning_rate": 3.44160202669615e-07, | |
| "loss": 0.9704416990280151, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.4892086330935252, | |
| "grad_norm": 0.27203404497254424, | |
| "learning_rate": 3.4057974365368494e-07, | |
| "loss": 0.9447322487831116, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.4920863309352517, | |
| "grad_norm": 0.3345564889146381, | |
| "learning_rate": 3.3701418024774654e-07, | |
| "loss": 0.9653472900390625, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.4949640287769785, | |
| "grad_norm": 0.3592025624857194, | |
| "learning_rate": 3.334635929941262e-07, | |
| "loss": 1.0416182279586792, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.497841726618705, | |
| "grad_norm": 0.3021027064210215, | |
| "learning_rate": 3.29928062096856e-07, | |
| "loss": 0.8763036727905273, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.5007194244604316, | |
| "grad_norm": 0.3300914239445515, | |
| "learning_rate": 3.264076674198594e-07, | |
| "loss": 0.9283760786056519, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.5035971223021583, | |
| "grad_norm": 0.30648914339336775, | |
| "learning_rate": 3.229024884851499e-07, | |
| "loss": 1.0218451023101807, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.506474820143885, | |
| "grad_norm": 0.3028904431521258, | |
| "learning_rate": 3.1941260447103226e-07, | |
| "loss": 0.9676252603530884, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.5093525179856115, | |
| "grad_norm": 0.31498989874096944, | |
| "learning_rate": 3.159380942103158e-07, | |
| "loss": 1.0615897178649902, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.512230215827338, | |
| "grad_norm": 0.3812643080356087, | |
| "learning_rate": 3.1247903618853323e-07, | |
| "loss": 1.0494942665100098, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.5151079136690648, | |
| "grad_norm": 0.3015637470031748, | |
| "learning_rate": 3.0903550854216597e-07, | |
| "loss": 0.9968015551567078, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.5179856115107913, | |
| "grad_norm": 0.31980913910153197, | |
| "learning_rate": 3.0560758905688243e-07, | |
| "loss": 0.8924911022186279, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.5208633093525181, | |
| "grad_norm": 0.3219645985844337, | |
| "learning_rate": 3.021953551657773e-07, | |
| "loss": 0.9802518486976624, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.5237410071942445, | |
| "grad_norm": 0.32235606474899925, | |
| "learning_rate": 2.9879888394762576e-07, | |
| "loss": 1.0681138038635254, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.5266187050359712, | |
| "grad_norm": 0.29753869212587086, | |
| "learning_rate": 2.9541825212514006e-07, | |
| "loss": 1.0018823146820068, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.5294964028776978, | |
| "grad_norm": 0.32215332495706295, | |
| "learning_rate": 2.920535360632368e-07, | |
| "loss": 1.0154387950897217, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.5323741007194245, | |
| "grad_norm": 0.33197651591712296, | |
| "learning_rate": 2.8870481176731287e-07, | |
| "loss": 1.0280838012695312, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.5352517985611511, | |
| "grad_norm": 0.31343044639526885, | |
| "learning_rate": 2.853721548815283e-07, | |
| "loss": 1.0017954111099243, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.5381294964028775, | |
| "grad_norm": 0.3073520760945164, | |
| "learning_rate": 2.8205564068709596e-07, | |
| "loss": 0.9721263647079468, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.5410071942446044, | |
| "grad_norm": 0.27794918211270764, | |
| "learning_rate": 2.787553441005839e-07, | |
| "loss": 0.8929443955421448, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.5438848920863308, | |
| "grad_norm": 0.36462602887473333, | |
| "learning_rate": 2.754713396722198e-07, | |
| "loss": 0.8837905526161194, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.5467625899280577, | |
| "grad_norm": 0.2590201800878289, | |
| "learning_rate": 2.7220370158421026e-07, | |
| "loss": 0.9194425344467163, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.5496402877697841, | |
| "grad_norm": 0.29622142248800076, | |
| "learning_rate": 2.68952503649062e-07, | |
| "loss": 0.9988787174224854, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.5525179856115108, | |
| "grad_norm": 0.3126140379352631, | |
| "learning_rate": 2.65717819307917e-07, | |
| "loss": 0.9965845942497253, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.5553956834532374, | |
| "grad_norm": 0.3415760999590957, | |
| "learning_rate": 2.6249972162889244e-07, | |
| "loss": 1.0078110694885254, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.558273381294964, | |
| "grad_norm": 0.3369977516727391, | |
| "learning_rate": 2.5929828330542935e-07, | |
| "loss": 1.0197123289108276, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.5611510791366907, | |
| "grad_norm": 0.32678249950063587, | |
| "learning_rate": 2.561135766546525e-07, | |
| "loss": 0.9625093936920166, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.5640287769784171, | |
| "grad_norm": 0.3014751724857403, | |
| "learning_rate": 2.529456736157356e-07, | |
| "loss": 0.8934499621391296, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.566906474820144, | |
| "grad_norm": 0.27876968496773213, | |
| "learning_rate": 2.4979464574827555e-07, | |
| "loss": 0.8468393087387085, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.5697841726618704, | |
| "grad_norm": 0.2757645778822954, | |
| "learning_rate": 2.4666056423067825e-07, | |
| "loss": 0.9273233413696289, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.572661870503597, | |
| "grad_norm": 0.313799523824532, | |
| "learning_rate": 2.4354349985854795e-07, | |
| "loss": 0.9406145811080933, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.5755395683453237, | |
| "grad_norm": 0.3175009917256148, | |
| "learning_rate": 2.4044352304309044e-07, | |
| "loss": 0.9628616571426392, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.5784172661870504, | |
| "grad_norm": 0.27743168737673624, | |
| "learning_rate": 2.3736070380952165e-07, | |
| "loss": 0.9222140312194824, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.581294964028777, | |
| "grad_norm": 0.2851178701079908, | |
| "learning_rate": 2.34295111795485e-07, | |
| "loss": 1.0274484157562256, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.5841726618705037, | |
| "grad_norm": 0.38515099715665085, | |
| "learning_rate": 2.3124681624948006e-07, | |
| "loss": 1.0696377754211426, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.5870503597122303, | |
| "grad_norm": 0.3133249799626146, | |
| "learning_rate": 2.2821588602929632e-07, | |
| "loss": 0.9214944839477539, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.5899280575539567, | |
| "grad_norm": 0.3307355043232978, | |
| "learning_rate": 2.252023896004601e-07, | |
| "loss": 0.8194340467453003, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.5928057553956836, | |
| "grad_norm": 0.2899499890420293, | |
| "learning_rate": 2.2220639503468542e-07, | |
| "loss": 0.9256591796875, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.59568345323741, | |
| "grad_norm": 0.3059563087954579, | |
| "learning_rate": 2.192279700083385e-07, | |
| "loss": 0.9171819686889648, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.5985611510791367, | |
| "grad_norm": 0.2910581911029511, | |
| "learning_rate": 2.162671818009082e-07, | |
| "loss": 0.8385239839553833, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.6014388489208633, | |
| "grad_norm": 0.3077283177341958, | |
| "learning_rate": 2.133240972934852e-07, | |
| "loss": 0.9529489278793335, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.60431654676259, | |
| "grad_norm": 0.33438792027940295, | |
| "learning_rate": 2.1039878296725332e-07, | |
| "loss": 0.9289690256118774, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.6071942446043166, | |
| "grad_norm": 0.33957986792953665, | |
| "learning_rate": 2.0749130490198652e-07, | |
| "loss": 1.0393140316009521, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.610071942446043, | |
| "grad_norm": 0.34246531637554406, | |
| "learning_rate": 2.046017287745554e-07, | |
| "loss": 1.0560503005981445, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.6129496402877699, | |
| "grad_norm": 0.33305609974874595, | |
| "learning_rate": 2.0173011985744603e-07, | |
| "loss": 0.9776498079299927, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.6158273381294963, | |
| "grad_norm": 0.33804380816718965, | |
| "learning_rate": 1.9887654301728286e-07, | |
| "loss": 0.9953919053077698, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.6187050359712232, | |
| "grad_norm": 0.33510637626675815, | |
| "learning_rate": 1.960410627133654e-07, | |
| "loss": 0.9703421592712402, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.6215827338129496, | |
| "grad_norm": 0.3528056926245532, | |
| "learning_rate": 1.9322374299621157e-07, | |
| "loss": 0.986659824848175, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.6244604316546762, | |
| "grad_norm": 0.3190776044982604, | |
| "learning_rate": 1.9042464750610987e-07, | |
| "loss": 1.047306776046753, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.6273381294964029, | |
| "grad_norm": 0.32915934915976663, | |
| "learning_rate": 1.8764383947168383e-07, | |
| "loss": 0.9910968542098999, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.6302158273381295, | |
| "grad_norm": 0.2786176626211202, | |
| "learning_rate": 1.8488138170846146e-07, | |
| "loss": 0.9353040456771851, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.6330935251798562, | |
| "grad_norm": 0.31795714142791043, | |
| "learning_rate": 1.8213733661745855e-07, | |
| "loss": 0.9244099259376526, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.6359712230215826, | |
| "grad_norm": 0.313138188279038, | |
| "learning_rate": 1.7941176618376686e-07, | |
| "loss": 0.9275581240653992, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.6388489208633095, | |
| "grad_norm": 0.2983504345513285, | |
| "learning_rate": 1.767047319751559e-07, | |
| "loss": 1.0571556091308594, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.641726618705036, | |
| "grad_norm": 0.2900247015191147, | |
| "learning_rate": 1.7401629514068116e-07, | |
| "loss": 0.8571426868438721, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.6446043165467625, | |
| "grad_norm": 0.32073923970300267, | |
| "learning_rate": 1.713465164093031e-07, | |
| "loss": 0.962169349193573, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.6474820143884892, | |
| "grad_norm": 0.3426036390922273, | |
| "learning_rate": 1.6869545608851465e-07, | |
| "loss": 1.068155288696289, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.6503597122302158, | |
| "grad_norm": 0.2912198669008646, | |
| "learning_rate": 1.6606317406298044e-07, | |
| "loss": 0.9102658033370972, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.6532374100719425, | |
| "grad_norm": 0.2783664324123141, | |
| "learning_rate": 1.6344972979318227e-07, | |
| "loss": 0.863929033279419, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.6561151079136691, | |
| "grad_norm": 0.3166046506897087, | |
| "learning_rate": 1.608551823140778e-07, | |
| "loss": 0.9644492268562317, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.6589928057553958, | |
| "grad_norm": 0.3032692441409985, | |
| "learning_rate": 1.5827959023376503e-07, | |
| "loss": 0.9762970209121704, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.6618705035971222, | |
| "grad_norm": 0.3016744079808176, | |
| "learning_rate": 1.5572301173216018e-07, | |
| "loss": 0.9287898540496826, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.664748201438849, | |
| "grad_norm": 0.27431423825024975, | |
| "learning_rate": 1.5318550455968282e-07, | |
| "loss": 0.9573485851287842, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.6676258992805755, | |
| "grad_norm": 0.3087425552754159, | |
| "learning_rate": 1.5066712603595045e-07, | |
| "loss": 0.9209293127059937, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.6705035971223021, | |
| "grad_norm": 0.2899753485027158, | |
| "learning_rate": 1.481679330484854e-07, | |
| "loss": 0.8685443997383118, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.6733812949640288, | |
| "grad_norm": 0.30804307167476375, | |
| "learning_rate": 1.4568798205142818e-07, | |
| "loss": 1.0030007362365723, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.6762589928057554, | |
| "grad_norm": 0.3269639712901895, | |
| "learning_rate": 1.4322732906426361e-07, | |
| "loss": 0.9561444520950317, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.679136690647482, | |
| "grad_norm": 0.24889504344208443, | |
| "learning_rate": 1.407860296705542e-07, | |
| "loss": 0.9087004661560059, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.6820143884892085, | |
| "grad_norm": 0.3206254281376367, | |
| "learning_rate": 1.3836413901668563e-07, | |
| "loss": 1.040391206741333, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.6848920863309353, | |
| "grad_norm": 0.35576899441592297, | |
| "learning_rate": 1.359617118106202e-07, | |
| "loss": 0.9220665097236633, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.6877697841726618, | |
| "grad_norm": 0.33992161289171674, | |
| "learning_rate": 1.3357880232066188e-07, | |
| "loss": 1.0048316717147827, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.6906474820143886, | |
| "grad_norm": 0.3155381507693763, | |
| "learning_rate": 1.3121546437422915e-07, | |
| "loss": 0.9235495924949646, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.693525179856115, | |
| "grad_norm": 0.2712504024495115, | |
| "learning_rate": 1.2887175135664085e-07, | |
| "loss": 0.9516848921775818, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.6964028776978417, | |
| "grad_norm": 0.2729229399526208, | |
| "learning_rate": 1.2654771620990845e-07, | |
| "loss": 0.8648025989532471, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.6992805755395683, | |
| "grad_norm": 0.28643485819645237, | |
| "learning_rate": 1.242434114315417e-07, | |
| "loss": 0.9381082057952881, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.702158273381295, | |
| "grad_norm": 0.3583701644093793, | |
| "learning_rate": 1.219588890733616e-07, | |
| "loss": 0.9976767301559448, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.7050359712230216, | |
| "grad_norm": 0.3058650158564105, | |
| "learning_rate": 1.1969420074032532e-07, | |
| "loss": 0.9010272026062012, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.707913669064748, | |
| "grad_norm": 0.3079535255773853, | |
| "learning_rate": 1.1744939758936045e-07, | |
| "loss": 0.9119488000869751, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.710791366906475, | |
| "grad_norm": 0.3112816376998103, | |
| "learning_rate": 1.1522453032820867e-07, | |
| "loss": 1.0138704776763916, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.7136690647482014, | |
| "grad_norm": 0.37249689212589393, | |
| "learning_rate": 1.1301964921428164e-07, | |
| "loss": 1.0309240818023682, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.7165467625899282, | |
| "grad_norm": 0.2919396976165667, | |
| "learning_rate": 1.1083480405352419e-07, | |
| "loss": 1.02201509475708, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.7194244604316546, | |
| "grad_norm": 0.28931756763377436, | |
| "learning_rate": 1.086700441992906e-07, | |
| "loss": 0.9084526896476746, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.7223021582733813, | |
| "grad_norm": 0.29523211564831336, | |
| "learning_rate": 1.0652541855122888e-07, | |
| "loss": 1.0073999166488647, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.725179856115108, | |
| "grad_norm": 0.3072460132576141, | |
| "learning_rate": 1.044009755541766e-07, | |
| "loss": 0.9768160581588745, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.7280575539568346, | |
| "grad_norm": 0.3021099960297105, | |
| "learning_rate": 1.0229676319706671e-07, | |
| "loss": 1.0317999124526978, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.7309352517985612, | |
| "grad_norm": 0.32444961570494624, | |
| "learning_rate": 1.0021282901184314e-07, | |
| "loss": 0.953796923160553, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.7338129496402876, | |
| "grad_norm": 0.31997858537370105, | |
| "learning_rate": 9.814922007238691e-08, | |
| "loss": 0.8879704475402832, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.7366906474820145, | |
| "grad_norm": 0.3001763106646087, | |
| "learning_rate": 9.610598299345363e-08, | |
| "loss": 0.9384487271308899, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.739568345323741, | |
| "grad_norm": 0.3076940778037735, | |
| "learning_rate": 9.408316392961946e-08, | |
| "loss": 0.8896828889846802, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.7424460431654676, | |
| "grad_norm": 0.30206497856260484, | |
| "learning_rate": 9.208080857423983e-08, | |
| "loss": 0.991470456123352, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.7453237410071942, | |
| "grad_norm": 0.31112092266827757, | |
| "learning_rate": 9.009896215841561e-08, | |
| "loss": 0.9205191135406494, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.7482014388489209, | |
| "grad_norm": 0.34267209965236567, | |
| "learning_rate": 8.813766944997292e-08, | |
| "loss": 0.9923685789108276, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.7510791366906475, | |
| "grad_norm": 0.2775856465253116, | |
| "learning_rate": 8.619697475245135e-08, | |
| "loss": 0.906508207321167, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.753956834532374, | |
| "grad_norm": 0.3695265847423868, | |
| "learning_rate": 8.427692190410252e-08, | |
| "loss": 1.1132643222808838, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.7568345323741008, | |
| "grad_norm": 0.335951419865602, | |
| "learning_rate": 8.237755427690097e-08, | |
| "loss": 0.958720326423645, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.7597122302158272, | |
| "grad_norm": 0.26398237511432854, | |
| "learning_rate": 8.049891477556325e-08, | |
| "loss": 0.8326461315155029, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.762589928057554, | |
| "grad_norm": 0.29113012627846874, | |
| "learning_rate": 7.864104583657994e-08, | |
| "loss": 0.92642742395401, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.7654676258992805, | |
| "grad_norm": 0.32510114636650206, | |
| "learning_rate": 7.680398942725607e-08, | |
| "loss": 1.0428296327590942, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.7683453237410072, | |
| "grad_norm": 0.3401761886813798, | |
| "learning_rate": 7.498778704476372e-08, | |
| "loss": 1.0307958126068115, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.7712230215827338, | |
| "grad_norm": 0.31568033622101, | |
| "learning_rate": 7.319247971520426e-08, | |
| "loss": 0.922683835029602, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.7741007194244605, | |
| "grad_norm": 0.2903936993592098, | |
| "learning_rate": 7.141810799268222e-08, | |
| "loss": 0.8616385459899902, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.776978417266187, | |
| "grad_norm": 0.28996044598452053, | |
| "learning_rate": 6.966471195838807e-08, | |
| "loss": 0.9075828194618225, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.7798561151079135, | |
| "grad_norm": 0.30488859521834044, | |
| "learning_rate": 6.793233121969422e-08, | |
| "loss": 0.9607424736022949, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.7827338129496404, | |
| "grad_norm": 0.29464217851633684, | |
| "learning_rate": 6.622100490925919e-08, | |
| "loss": 0.9187620878219604, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.7856115107913668, | |
| "grad_norm": 0.316636600630024, | |
| "learning_rate": 6.453077168414455e-08, | |
| "loss": 0.9384863972663879, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.7884892086330937, | |
| "grad_norm": 0.3042112992195363, | |
| "learning_rate": 6.286166972494079e-08, | |
| "loss": 0.9122720956802368, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.79136690647482, | |
| "grad_norm": 0.2799449479478083, | |
| "learning_rate": 6.121373673490548e-08, | |
| "loss": 0.9125893712043762, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.7942446043165468, | |
| "grad_norm": 0.287767507487325, | |
| "learning_rate": 5.958700993911192e-08, | |
| "loss": 0.8173254132270813, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.7971223021582734, | |
| "grad_norm": 0.2716254786056972, | |
| "learning_rate": 5.798152608360696e-08, | |
| "loss": 0.9122398495674133, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.31952149094868726, | |
| "learning_rate": 5.6397321434582534e-08, | |
| "loss": 1.0111041069030762, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.8028776978417267, | |
| "grad_norm": 0.27995436477971375, | |
| "learning_rate": 5.483443177755498e-08, | |
| "loss": 0.9707604646682739, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.8057553956834531, | |
| "grad_norm": 0.3243898181487502, | |
| "learning_rate": 5.32928924165581e-08, | |
| "loss": 1.0739054679870605, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.80863309352518, | |
| "grad_norm": 0.2984254760085138, | |
| "learning_rate": 5.177273817334438e-08, | |
| "loss": 0.9249017238616943, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.8115107913669064, | |
| "grad_norm": 0.3146364757154643, | |
| "learning_rate": 5.027400338659926e-08, | |
| "loss": 0.9324784278869629, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.814388489208633, | |
| "grad_norm": 0.2942034665756954, | |
| "learning_rate": 4.879672191116524e-08, | |
| "loss": 0.92160564661026, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.8172661870503597, | |
| "grad_norm": 0.3093230418919872, | |
| "learning_rate": 4.7340927117277105e-08, | |
| "loss": 0.9626412987709045, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.8201438848920863, | |
| "grad_norm": 0.2737771051650981, | |
| "learning_rate": 4.590665188980769e-08, | |
| "loss": 0.9344724416732788, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.823021582733813, | |
| "grad_norm": 0.2868842184615828, | |
| "learning_rate": 4.44939286275261e-08, | |
| "loss": 0.969592809677124, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.8258992805755394, | |
| "grad_norm": 0.30108362908099795, | |
| "learning_rate": 4.310278924236454e-08, | |
| "loss": 0.9268302917480469, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.8287769784172663, | |
| "grad_norm": 0.3038411300603624, | |
| "learning_rate": 4.173326515869879e-08, | |
| "loss": 0.9940468072891235, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.8316546762589927, | |
| "grad_norm": 0.30517693172745697, | |
| "learning_rate": 4.038538731263719e-08, | |
| "loss": 0.9976387023925781, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.8345323741007196, | |
| "grad_norm": 0.3273494194690698, | |
| "learning_rate": 3.9059186151322534e-08, | |
| "loss": 1.05000638961792, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.837410071942446, | |
| "grad_norm": 0.29782336017676786, | |
| "learning_rate": 3.775469163224432e-08, | |
| "loss": 0.9021062850952148, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.8402877697841726, | |
| "grad_norm": 0.2796086934847699, | |
| "learning_rate": 3.647193322256137e-08, | |
| "loss": 0.8977291584014893, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.8431654676258993, | |
| "grad_norm": 0.32101134920342667, | |
| "learning_rate": 3.5210939898437154e-08, | |
| "loss": 0.9850113391876221, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.846043165467626, | |
| "grad_norm": 0.3578812552257961, | |
| "learning_rate": 3.397174014438431e-08, | |
| "loss": 0.9935275316238403, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.8489208633093526, | |
| "grad_norm": 0.3018396148934059, | |
| "learning_rate": 3.275436195262193e-08, | |
| "loss": 0.9289500713348389, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.851798561151079, | |
| "grad_norm": 0.2833335903840955, | |
| "learning_rate": 3.155883282244287e-08, | |
| "loss": 0.8204896450042725, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.8546762589928059, | |
| "grad_norm": 0.3430760638933196, | |
| "learning_rate": 3.038517975959276e-08, | |
| "loss": 1.0271613597869873, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.8575539568345323, | |
| "grad_norm": 0.3085433261480597, | |
| "learning_rate": 2.923342927565964e-08, | |
| "loss": 0.9084464311599731, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.8604316546762591, | |
| "grad_norm": 0.30704980866497883, | |
| "learning_rate": 2.8103607387475746e-08, | |
| "loss": 0.9712929725646973, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.8633093525179856, | |
| "grad_norm": 0.28312145339260103, | |
| "learning_rate": 2.69957396165289e-08, | |
| "loss": 0.9905073642730713, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.8661870503597122, | |
| "grad_norm": 0.2770551368568327, | |
| "learning_rate": 2.5909850988386937e-08, | |
| "loss": 0.9023991227149963, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.8690647482014389, | |
| "grad_norm": 0.31953951865146846, | |
| "learning_rate": 2.4845966032131628e-08, | |
| "loss": 1.0285024642944336, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.8719424460431655, | |
| "grad_norm": 0.3452314742008196, | |
| "learning_rate": 2.380410877980532e-08, | |
| "loss": 1.004025936126709, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.8748201438848922, | |
| "grad_norm": 0.33870243092263896, | |
| "learning_rate": 2.278430276586729e-08, | |
| "loss": 0.9524623155593872, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.8776978417266186, | |
| "grad_norm": 0.32350246830260215, | |
| "learning_rate": 2.1786571026662702e-08, | |
| "loss": 1.0455811023712158, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.8805755395683454, | |
| "grad_norm": 0.3328778935315255, | |
| "learning_rate": 2.0810936099902364e-08, | |
| "loss": 0.9919840693473816, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.8834532374100719, | |
| "grad_norm": 0.29952519081119605, | |
| "learning_rate": 1.9857420024152806e-08, | |
| "loss": 0.9986833930015564, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.8863309352517985, | |
| "grad_norm": 0.33793774006407235, | |
| "learning_rate": 1.892604433833933e-08, | |
| "loss": 0.931689441204071, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.8892086330935252, | |
| "grad_norm": 0.30901405969162027, | |
| "learning_rate": 1.8016830081259094e-08, | |
| "loss": 0.9114639759063721, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.8920863309352518, | |
| "grad_norm": 0.32095691544451443, | |
| "learning_rate": 1.712979779110568e-08, | |
| "loss": 1.0065832138061523, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.8949640287769784, | |
| "grad_norm": 0.30923568734316464, | |
| "learning_rate": 1.6264967505005612e-08, | |
| "loss": 0.9409237504005432, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.8978417266187049, | |
| "grad_norm": 0.31465289359066045, | |
| "learning_rate": 1.5422358758565344e-08, | |
| "loss": 0.9561393857002258, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.9007194244604317, | |
| "grad_norm": 0.2782146122603473, | |
| "learning_rate": 1.4601990585430212e-08, | |
| "loss": 0.8838869333267212, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.9035971223021582, | |
| "grad_norm": 0.3476179534958916, | |
| "learning_rate": 1.380388151685441e-08, | |
| "loss": 1.1335151195526123, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.906474820143885, | |
| "grad_norm": 0.30248705756655914, | |
| "learning_rate": 1.302804958128223e-08, | |
| "loss": 0.9658051133155823, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.9093525179856115, | |
| "grad_norm": 0.30985357673524433, | |
| "learning_rate": 1.2274512303941164e-08, | |
| "loss": 0.8988052606582642, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.912230215827338, | |
| "grad_norm": 0.3105920840120309, | |
| "learning_rate": 1.1543286706445553e-08, | |
| "loss": 0.9807320237159729, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.9151079136690647, | |
| "grad_norm": 0.30776301282911345, | |
| "learning_rate": 1.0834389306412673e-08, | |
| "loss": 0.9672824144363403, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.9179856115107914, | |
| "grad_norm": 0.3355211414551835, | |
| "learning_rate": 1.0147836117088915e-08, | |
| "loss": 0.9862767457962036, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.920863309352518, | |
| "grad_norm": 0.29700692569905607, | |
| "learning_rate": 9.483642646988977e-09, | |
| "loss": 0.9207549095153809, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.9237410071942445, | |
| "grad_norm": 0.32529111277227224, | |
| "learning_rate": 8.841823899544577e-09, | |
| "loss": 1.05051851272583, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.9266187050359713, | |
| "grad_norm": 0.3097839939998225, | |
| "learning_rate": 8.222394372766173e-09, | |
| "loss": 0.9589816927909851, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.9294964028776977, | |
| "grad_norm": 0.30754678215336145, | |
| "learning_rate": 7.625368058915226e-09, | |
| "loss": 0.9071527719497681, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.9323741007194246, | |
| "grad_norm": 0.28515154254996145, | |
| "learning_rate": 7.05075844418812e-09, | |
| "loss": 0.98401939868927, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.935251798561151, | |
| "grad_norm": 0.32501900842828985, | |
| "learning_rate": 6.498578508411734e-09, | |
| "loss": 0.9431239366531372, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.9381294964028777, | |
| "grad_norm": 0.2997493450074929, | |
| "learning_rate": 5.9688407247500124e-09, | |
| "loss": 0.8510617017745972, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.9410071942446043, | |
| "grad_norm": 0.2931086066866291, | |
| "learning_rate": 5.461557059422306e-09, | |
| "loss": 0.9289333820343018, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.943884892086331, | |
| "grad_norm": 0.3450018212922675, | |
| "learning_rate": 4.9767389714330256e-09, | |
| "loss": 0.988121509552002, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.9467625899280576, | |
| "grad_norm": 0.3140422887654555, | |
| "learning_rate": 4.514397412312965e-09, | |
| "loss": 1.0198135375976562, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.949640287769784, | |
| "grad_norm": 0.2885512942736773, | |
| "learning_rate": 4.074542825871275e-09, | |
| "loss": 0.8961633443832397, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.952517985611511, | |
| "grad_norm": 0.35409915766999916, | |
| "learning_rate": 3.657185147960762e-09, | |
| "loss": 1.0393238067626953, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.9553956834532373, | |
| "grad_norm": 0.30601728285109125, | |
| "learning_rate": 3.2623338062522933e-09, | |
| "loss": 0.9444550275802612, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.958273381294964, | |
| "grad_norm": 0.2924289258676955, | |
| "learning_rate": 2.889997720022297e-09, | |
| "loss": 0.9215587377548218, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.9611510791366906, | |
| "grad_norm": 0.30327925646552367, | |
| "learning_rate": 2.5401852999512586e-09, | |
| "loss": 1.000258445739746, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.9640287769784173, | |
| "grad_norm": 0.29260027512430775, | |
| "learning_rate": 2.212904447933983e-09, | |
| "loss": 0.9252768754959106, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.966906474820144, | |
| "grad_norm": 0.3011559234991841, | |
| "learning_rate": 1.908162556900628e-09, | |
| "loss": 0.9935536980628967, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.9697841726618706, | |
| "grad_norm": 0.27115587245365835, | |
| "learning_rate": 1.6259665106498344e-09, | |
| "loss": 0.9564770460128784, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.9726618705035972, | |
| "grad_norm": 0.27403209801565, | |
| "learning_rate": 1.3663226836936326e-09, | |
| "loss": 0.8805955052375793, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.9755395683453236, | |
| "grad_norm": 0.3044315007410778, | |
| "learning_rate": 1.1292369411127766e-09, | |
| "loss": 0.8896344304084778, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.9784172661870505, | |
| "grad_norm": 0.30681720361851966, | |
| "learning_rate": 9.147146384250737e-10, | |
| "loss": 0.980034351348877, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.981294964028777, | |
| "grad_norm": 0.32187194306044253, | |
| "learning_rate": 7.227606214635917e-10, | |
| "loss": 0.9895438551902771, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.9841726618705036, | |
| "grad_norm": 0.3136795801610112, | |
| "learning_rate": 5.533792262675252e-10, | |
| "loss": 0.9838018417358398, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.9870503597122302, | |
| "grad_norm": 0.29300997638864673, | |
| "learning_rate": 4.0657427898460603e-10, | |
| "loss": 0.8921380043029785, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.9899280575539569, | |
| "grad_norm": 0.2711131667782687, | |
| "learning_rate": 2.8234909578417344e-10, | |
| "loss": 0.8311777114868164, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.9928057553956835, | |
| "grad_norm": 0.3390732741445483, | |
| "learning_rate": 1.8070648278234457e-10, | |
| "loss": 1.0223444700241089, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.99568345323741, | |
| "grad_norm": 0.27722007418335426, | |
| "learning_rate": 1.0164873597895419e-10, | |
| "loss": 0.908263087272644, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.9985611510791368, | |
| "grad_norm": 0.32715406813626025, | |
| "learning_rate": 4.5177641205262906e-11, | |
| "loss": 0.950904369354248, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.45329927875611203, | |
| "learning_rate": 1.1294474083878292e-11, | |
| "loss": 1.1292500495910645, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 696, | |
| "total_flos": 1203702673702912.0, | |
| "train_loss": 1.0133290295114463, | |
| "train_runtime": 7255.9911, | |
| "train_samples_per_second": 0.766, | |
| "train_steps_per_second": 0.096 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 696, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1203702673702912.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |