Instructions to use wonwonn/agent_random_adapter with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use wonwonn/agent_random_adapter with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct") model = PeftModel.from_pretrained(base_model, "wonwonn/agent_random_adapter") - Transformers
How to use wonwonn/agent_random_adapter with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="wonwonn/agent_random_adapter") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("wonwonn/agent_random_adapter", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use wonwonn/agent_random_adapter with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "wonwonn/agent_random_adapter" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wonwonn/agent_random_adapter", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/wonwonn/agent_random_adapter
- SGLang
How to use wonwonn/agent_random_adapter with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "wonwonn/agent_random_adapter" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wonwonn/agent_random_adapter", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "wonwonn/agent_random_adapter" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wonwonn/agent_random_adapter", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use wonwonn/agent_random_adapter with Docker Model Runner:
docker model run hf.co/wonwonn/agent_random_adapter
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 1662, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0012040939193257074, | |
| "grad_norm": 1.0888201950123075, | |
| "learning_rate": 0.0, | |
| "loss": 1.2516779899597168, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002408187838651415, | |
| "grad_norm": 1.3504171031717647, | |
| "learning_rate": 1.1904761904761904e-06, | |
| "loss": 1.292458415031433, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.003612281757977122, | |
| "grad_norm": 1.3295966510635613, | |
| "learning_rate": 2.3809523809523808e-06, | |
| "loss": 1.2252806425094604, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00481637567730283, | |
| "grad_norm": 1.2049241506681116, | |
| "learning_rate": 3.5714285714285714e-06, | |
| "loss": 1.1460682153701782, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.006020469596628537, | |
| "grad_norm": 1.3299344704725482, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 1.3050752878189087, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.007224563515954244, | |
| "grad_norm": 1.1937833887885951, | |
| "learning_rate": 5.9523809523809525e-06, | |
| "loss": 1.1561931371688843, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.008428657435279952, | |
| "grad_norm": 1.1972161646097195, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 1.348860502243042, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.00963275135460566, | |
| "grad_norm": 1.1599119044275834, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 1.1653285026550293, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.010836845273931367, | |
| "grad_norm": 1.168880883596748, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 1.2427582740783691, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.012040939193257074, | |
| "grad_norm": 1.1907331737748812, | |
| "learning_rate": 1.0714285714285714e-05, | |
| "loss": 1.1958000659942627, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.013245033112582781, | |
| "grad_norm": 1.0764330357862444, | |
| "learning_rate": 1.1904761904761905e-05, | |
| "loss": 1.0088632106781006, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.014449127031908489, | |
| "grad_norm": 0.970590293533405, | |
| "learning_rate": 1.3095238095238096e-05, | |
| "loss": 1.057112455368042, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.015653220951234198, | |
| "grad_norm": 0.9117577162508362, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 1.093891978263855, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.016857314870559904, | |
| "grad_norm": 0.7128858566790944, | |
| "learning_rate": 1.5476190476190476e-05, | |
| "loss": 0.9809948205947876, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.018061408789885613, | |
| "grad_norm": 0.7466873900766856, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 1.1132748126983643, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.01926550270921132, | |
| "grad_norm": 0.758857309279139, | |
| "learning_rate": 1.785714285714286e-05, | |
| "loss": 0.9380471706390381, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.020469596628537028, | |
| "grad_norm": 0.6736348254664769, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 1.1095030307769775, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.021673690547862733, | |
| "grad_norm": 0.675335552277212, | |
| "learning_rate": 2.023809523809524e-05, | |
| "loss": 1.0808950662612915, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.022877784467188442, | |
| "grad_norm": 0.7264824733143365, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 0.9620448350906372, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.024081878386514148, | |
| "grad_norm": 0.7051338547341263, | |
| "learning_rate": 2.261904761904762e-05, | |
| "loss": 0.9811359643936157, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.025285972305839857, | |
| "grad_norm": 0.7843003159004286, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 1.0925147533416748, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.026490066225165563, | |
| "grad_norm": 0.7516975231625368, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.9252520799636841, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.027694160144491272, | |
| "grad_norm": 0.6962403835861913, | |
| "learning_rate": 2.6190476190476192e-05, | |
| "loss": 1.0134961605072021, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.028898254063816978, | |
| "grad_norm": 0.7671373768051949, | |
| "learning_rate": 2.7380952380952383e-05, | |
| "loss": 1.0397887229919434, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.030102347983142687, | |
| "grad_norm": 0.7085458224295194, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 1.0200514793395996, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.031306441902468396, | |
| "grad_norm": 0.6208311360775705, | |
| "learning_rate": 2.9761904761904762e-05, | |
| "loss": 0.8722552061080933, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0325105358217941, | |
| "grad_norm": 0.7245939746288178, | |
| "learning_rate": 3.095238095238095e-05, | |
| "loss": 1.0895636081695557, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.03371462974111981, | |
| "grad_norm": 0.7155183357450243, | |
| "learning_rate": 3.2142857142857144e-05, | |
| "loss": 0.9978558421134949, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.034918723660445516, | |
| "grad_norm": 0.7124130674118848, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 1.0731256008148193, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.036122817579771226, | |
| "grad_norm": 0.6936933307418388, | |
| "learning_rate": 3.4523809523809526e-05, | |
| "loss": 1.0017905235290527, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03732691149909693, | |
| "grad_norm": 0.6914756992106068, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 0.9039508700370789, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.03853100541842264, | |
| "grad_norm": 0.7530165617199812, | |
| "learning_rate": 3.690476190476191e-05, | |
| "loss": 1.018087387084961, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.039735099337748346, | |
| "grad_norm": 0.7253115434648741, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 0.9909164309501648, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.040939193257074055, | |
| "grad_norm": 0.6415639959381287, | |
| "learning_rate": 3.928571428571429e-05, | |
| "loss": 0.9909334182739258, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.04214328717639976, | |
| "grad_norm": 0.6628002571565351, | |
| "learning_rate": 4.047619047619048e-05, | |
| "loss": 0.8408276438713074, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04334738109572547, | |
| "grad_norm": 0.6544680745811386, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.9553419947624207, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.044551475015051176, | |
| "grad_norm": 0.7501400971456818, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 0.991558313369751, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.045755568934376885, | |
| "grad_norm": 0.6299802150920484, | |
| "learning_rate": 4.404761904761905e-05, | |
| "loss": 0.880527138710022, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04695966285370259, | |
| "grad_norm": 0.7119587435770088, | |
| "learning_rate": 4.523809523809524e-05, | |
| "loss": 1.084947109222412, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.048163756773028296, | |
| "grad_norm": 0.6210760615390281, | |
| "learning_rate": 4.642857142857143e-05, | |
| "loss": 0.8718366622924805, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.049367850692354005, | |
| "grad_norm": 0.6905357174119584, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 1.030173897743225, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.050571944611679714, | |
| "grad_norm": 0.6604045749465294, | |
| "learning_rate": 4.880952380952381e-05, | |
| "loss": 0.8225071430206299, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.05177603853100542, | |
| "grad_norm": 0.7288884168617961, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8640418648719788, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.052980132450331126, | |
| "grad_norm": 0.7530293725220191, | |
| "learning_rate": 5.119047619047619e-05, | |
| "loss": 1.053181529045105, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.054184226369656835, | |
| "grad_norm": 0.633690102398682, | |
| "learning_rate": 5.2380952380952384e-05, | |
| "loss": 0.9794737100601196, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.055388320288982544, | |
| "grad_norm": 0.6366432445096702, | |
| "learning_rate": 5.3571428571428575e-05, | |
| "loss": 0.928667426109314, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.056592414208308246, | |
| "grad_norm": 0.6635606705213237, | |
| "learning_rate": 5.4761904761904766e-05, | |
| "loss": 0.8890948295593262, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.057796508127633955, | |
| "grad_norm": 0.717685283959276, | |
| "learning_rate": 5.595238095238096e-05, | |
| "loss": 0.9435674548149109, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.059000602046959665, | |
| "grad_norm": 0.672785636669423, | |
| "learning_rate": 5.714285714285714e-05, | |
| "loss": 0.8677853345870972, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.060204695966285374, | |
| "grad_norm": 0.6736728035828868, | |
| "learning_rate": 5.833333333333334e-05, | |
| "loss": 0.904883861541748, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.061408789885611076, | |
| "grad_norm": 0.6832027265384513, | |
| "learning_rate": 5.9523809523809524e-05, | |
| "loss": 0.8402994871139526, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.06261288380493679, | |
| "grad_norm": 0.6333608523885448, | |
| "learning_rate": 6.0714285714285715e-05, | |
| "loss": 0.8128600716590881, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0638169777242625, | |
| "grad_norm": 0.6511799389683816, | |
| "learning_rate": 6.19047619047619e-05, | |
| "loss": 0.9272807836532593, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.0650210716435882, | |
| "grad_norm": 0.7660958513490825, | |
| "learning_rate": 6.30952380952381e-05, | |
| "loss": 0.9557014107704163, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.06622516556291391, | |
| "grad_norm": 0.6336813527780781, | |
| "learning_rate": 6.428571428571429e-05, | |
| "loss": 0.9378836154937744, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06742925948223961, | |
| "grad_norm": 0.7508486475685524, | |
| "learning_rate": 6.547619047619048e-05, | |
| "loss": 0.8787630796432495, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06863335340156532, | |
| "grad_norm": 0.84517602222297, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.8722257018089294, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.06983744732089103, | |
| "grad_norm": 0.7596529781054532, | |
| "learning_rate": 6.785714285714286e-05, | |
| "loss": 0.8949298858642578, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.07104154124021674, | |
| "grad_norm": 0.6603054780063723, | |
| "learning_rate": 6.904761904761905e-05, | |
| "loss": 0.91631019115448, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.07224563515954245, | |
| "grad_norm": 0.6587245162237584, | |
| "learning_rate": 7.023809523809524e-05, | |
| "loss": 0.8479186296463013, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07344972907886815, | |
| "grad_norm": 0.5896173965743856, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 0.8625457286834717, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.07465382299819386, | |
| "grad_norm": 0.656853432206036, | |
| "learning_rate": 7.261904761904762e-05, | |
| "loss": 0.7943159937858582, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.07585791691751957, | |
| "grad_norm": 0.7173445966189417, | |
| "learning_rate": 7.380952380952382e-05, | |
| "loss": 0.8757072687149048, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.07706201083684527, | |
| "grad_norm": 0.741123976756676, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.9337731003761292, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07826610475617098, | |
| "grad_norm": 0.6587435716311675, | |
| "learning_rate": 7.619047619047618e-05, | |
| "loss": 0.9480957984924316, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.07947019867549669, | |
| "grad_norm": 0.7923118352415254, | |
| "learning_rate": 7.738095238095239e-05, | |
| "loss": 0.9424346089363098, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.0806742925948224, | |
| "grad_norm": 0.7437650621360334, | |
| "learning_rate": 7.857142857142858e-05, | |
| "loss": 0.9045975208282471, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.08187838651414811, | |
| "grad_norm": 0.7969199409769604, | |
| "learning_rate": 7.976190476190477e-05, | |
| "loss": 0.9725304245948792, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.08308248043347381, | |
| "grad_norm": 0.7206589585684046, | |
| "learning_rate": 8.095238095238096e-05, | |
| "loss": 0.9252752661705017, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.08428657435279951, | |
| "grad_norm": 0.8407014467727374, | |
| "learning_rate": 8.214285714285714e-05, | |
| "loss": 0.8761662244796753, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08549066827212523, | |
| "grad_norm": 0.6884460334379702, | |
| "learning_rate": 8.333333333333334e-05, | |
| "loss": 0.8926453590393066, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.08669476219145093, | |
| "grad_norm": 0.7572421558690815, | |
| "learning_rate": 8.452380952380952e-05, | |
| "loss": 1.014685869216919, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.08789885611077664, | |
| "grad_norm": 0.7214896773064038, | |
| "learning_rate": 8.571428571428571e-05, | |
| "loss": 0.9112828969955444, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.08910295003010235, | |
| "grad_norm": 0.6504920809145635, | |
| "learning_rate": 8.690476190476192e-05, | |
| "loss": 0.877046525478363, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.09030704394942805, | |
| "grad_norm": 0.7130323126220806, | |
| "learning_rate": 8.80952380952381e-05, | |
| "loss": 0.988493025302887, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.09151113786875377, | |
| "grad_norm": 0.6759832506467568, | |
| "learning_rate": 8.92857142857143e-05, | |
| "loss": 0.8634282946586609, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.09271523178807947, | |
| "grad_norm": 0.7128524999186786, | |
| "learning_rate": 9.047619047619048e-05, | |
| "loss": 0.8843424320220947, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.09391932570740517, | |
| "grad_norm": 0.7527502094813464, | |
| "learning_rate": 9.166666666666667e-05, | |
| "loss": 0.9262052178382874, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.09512341962673089, | |
| "grad_norm": 0.679134529747516, | |
| "learning_rate": 9.285714285714286e-05, | |
| "loss": 0.902319610118866, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.09632751354605659, | |
| "grad_norm": 0.6582862889379956, | |
| "learning_rate": 9.404761904761905e-05, | |
| "loss": 0.9413686394691467, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0975316074653823, | |
| "grad_norm": 0.6130866418518588, | |
| "learning_rate": 9.523809523809524e-05, | |
| "loss": 0.8581817150115967, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.09873570138470801, | |
| "grad_norm": 0.5687360865386506, | |
| "learning_rate": 9.642857142857143e-05, | |
| "loss": 0.873586893081665, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.09993979530403371, | |
| "grad_norm": 0.5975148331542051, | |
| "learning_rate": 9.761904761904762e-05, | |
| "loss": 0.8216943740844727, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.10114388922335943, | |
| "grad_norm": 0.5756637160773737, | |
| "learning_rate": 9.880952380952381e-05, | |
| "loss": 0.8205018639564514, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.10234798314268513, | |
| "grad_norm": 0.6452316625847548, | |
| "learning_rate": 0.0001, | |
| "loss": 0.9865813255310059, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.10355207706201083, | |
| "grad_norm": 0.6104006813254862, | |
| "learning_rate": 9.999990091096183e-05, | |
| "loss": 0.8462604284286499, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.10475617098133655, | |
| "grad_norm": 0.5499916520131942, | |
| "learning_rate": 9.999960364424007e-05, | |
| "loss": 0.8234490752220154, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.10596026490066225, | |
| "grad_norm": 0.5887689279983574, | |
| "learning_rate": 9.999910820101293e-05, | |
| "loss": 0.7696816921234131, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.10716435881998795, | |
| "grad_norm": 0.6295647282540509, | |
| "learning_rate": 9.999841458324413e-05, | |
| "loss": 0.9583300352096558, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.10836845273931367, | |
| "grad_norm": 0.6257819500551344, | |
| "learning_rate": 9.999752279368291e-05, | |
| "loss": 0.8089509010314941, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.10957254665863937, | |
| "grad_norm": 0.6853890534698811, | |
| "learning_rate": 9.999643283586388e-05, | |
| "loss": 0.7177246809005737, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.11077664057796509, | |
| "grad_norm": 0.5875425882208323, | |
| "learning_rate": 9.999514471410719e-05, | |
| "loss": 0.8235414028167725, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.11198073449729079, | |
| "grad_norm": 0.6657487103601663, | |
| "learning_rate": 9.999365843351838e-05, | |
| "loss": 0.878923773765564, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.11318482841661649, | |
| "grad_norm": 0.6312303761922631, | |
| "learning_rate": 9.999197399998841e-05, | |
| "loss": 0.9166843891143799, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.11438892233594221, | |
| "grad_norm": 0.6219532249650043, | |
| "learning_rate": 9.999009142019364e-05, | |
| "loss": 0.8822045922279358, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.11559301625526791, | |
| "grad_norm": 0.6714545958652653, | |
| "learning_rate": 9.99880107015958e-05, | |
| "loss": 0.8442031145095825, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.11679711017459361, | |
| "grad_norm": 0.6187610552353822, | |
| "learning_rate": 9.998573185244192e-05, | |
| "loss": 0.7852096557617188, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.11800120409391933, | |
| "grad_norm": 0.578079618701512, | |
| "learning_rate": 9.998325488176437e-05, | |
| "loss": 0.8714963793754578, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.11920529801324503, | |
| "grad_norm": 0.5345630777945766, | |
| "learning_rate": 9.99805797993808e-05, | |
| "loss": 0.8692537546157837, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.12040939193257075, | |
| "grad_norm": 0.6181542688226837, | |
| "learning_rate": 9.997770661589403e-05, | |
| "loss": 0.8973393440246582, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12161348585189645, | |
| "grad_norm": 0.64002745884027, | |
| "learning_rate": 9.997463534269213e-05, | |
| "loss": 0.8967085480690002, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.12281757977122215, | |
| "grad_norm": 0.7341991040389423, | |
| "learning_rate": 9.997136599194825e-05, | |
| "loss": 0.8160005807876587, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.12402167369054787, | |
| "grad_norm": 0.6265972559860278, | |
| "learning_rate": 9.996789857662068e-05, | |
| "loss": 1.0133693218231201, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.12522576760987358, | |
| "grad_norm": 0.6957185688667177, | |
| "learning_rate": 9.996423311045273e-05, | |
| "loss": 0.8984023928642273, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.12642986152919927, | |
| "grad_norm": 0.5956498399306159, | |
| "learning_rate": 9.99603696079727e-05, | |
| "loss": 0.9195867776870728, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.127633955448525, | |
| "grad_norm": 0.6463966531285172, | |
| "learning_rate": 9.995630808449383e-05, | |
| "loss": 0.8191015720367432, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.1288380493678507, | |
| "grad_norm": 0.6171569033866292, | |
| "learning_rate": 9.99520485561142e-05, | |
| "loss": 0.9108123779296875, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.1300421432871764, | |
| "grad_norm": 0.6143974868615907, | |
| "learning_rate": 9.994759103971673e-05, | |
| "loss": 0.8896417617797852, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.1312462372065021, | |
| "grad_norm": 0.6149896368980597, | |
| "learning_rate": 9.994293555296904e-05, | |
| "loss": 0.8559168577194214, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.13245033112582782, | |
| "grad_norm": 0.6515351535933014, | |
| "learning_rate": 9.993808211432346e-05, | |
| "loss": 0.8588761687278748, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1336544250451535, | |
| "grad_norm": 0.6676952442415878, | |
| "learning_rate": 9.993303074301689e-05, | |
| "loss": 0.8163575530052185, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.13485851896447923, | |
| "grad_norm": 0.6375806365685596, | |
| "learning_rate": 9.992778145907073e-05, | |
| "loss": 0.9100340604782104, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.13606261288380495, | |
| "grad_norm": 0.6252693886741836, | |
| "learning_rate": 9.992233428329088e-05, | |
| "loss": 0.8442620038986206, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.13726670680313063, | |
| "grad_norm": 0.7244683282798454, | |
| "learning_rate": 9.99166892372675e-05, | |
| "loss": 0.8886062502861023, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.13847080072245635, | |
| "grad_norm": 0.7464382496737153, | |
| "learning_rate": 9.991084634337511e-05, | |
| "loss": 0.8216350674629211, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.13967489464178207, | |
| "grad_norm": 0.646628205480326, | |
| "learning_rate": 9.99048056247724e-05, | |
| "loss": 0.9128209948539734, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.14087898856110775, | |
| "grad_norm": 0.5712623706680054, | |
| "learning_rate": 9.989856710540209e-05, | |
| "loss": 0.8703181743621826, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.14208308248043347, | |
| "grad_norm": 0.6329274208544833, | |
| "learning_rate": 9.989213080999097e-05, | |
| "loss": 0.8345284461975098, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1432871763997592, | |
| "grad_norm": 0.62852450343761, | |
| "learning_rate": 9.988549676404965e-05, | |
| "loss": 0.8411127328872681, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.1444912703190849, | |
| "grad_norm": 0.6420543736879645, | |
| "learning_rate": 9.987866499387264e-05, | |
| "loss": 0.9257729649543762, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1456953642384106, | |
| "grad_norm": 0.5528124716336356, | |
| "learning_rate": 9.987163552653802e-05, | |
| "loss": 0.8293143510818481, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.1468994581577363, | |
| "grad_norm": 0.6033234224852082, | |
| "learning_rate": 9.986440838990755e-05, | |
| "loss": 0.8318862915039062, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.14810355207706202, | |
| "grad_norm": 0.5677902467857496, | |
| "learning_rate": 9.985698361262642e-05, | |
| "loss": 0.8512689471244812, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.1493076459963877, | |
| "grad_norm": 0.6052037467329195, | |
| "learning_rate": 9.984936122412319e-05, | |
| "loss": 0.8174145221710205, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.15051173991571343, | |
| "grad_norm": 0.6203272750504745, | |
| "learning_rate": 9.984154125460969e-05, | |
| "loss": 0.8040434718132019, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.15171583383503914, | |
| "grad_norm": 0.5640742447104545, | |
| "learning_rate": 9.983352373508081e-05, | |
| "loss": 0.8144879341125488, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.15291992775436483, | |
| "grad_norm": 0.7383291925432525, | |
| "learning_rate": 9.982530869731451e-05, | |
| "loss": 0.8918353915214539, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.15412402167369055, | |
| "grad_norm": 0.6110098020804609, | |
| "learning_rate": 9.98168961738716e-05, | |
| "loss": 0.8100703358650208, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.15532811559301626, | |
| "grad_norm": 0.7276368732153812, | |
| "learning_rate": 9.980828619809561e-05, | |
| "loss": 0.8626886010169983, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.15653220951234195, | |
| "grad_norm": 0.6182775041438267, | |
| "learning_rate": 9.979947880411273e-05, | |
| "loss": 0.7800024747848511, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.15773630343166767, | |
| "grad_norm": 0.6684016472398361, | |
| "learning_rate": 9.97904740268316e-05, | |
| "loss": 0.8336461782455444, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.15894039735099338, | |
| "grad_norm": 0.6781202957252627, | |
| "learning_rate": 9.978127190194322e-05, | |
| "loss": 0.9167442321777344, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.16014449127031907, | |
| "grad_norm": 0.5781488999510032, | |
| "learning_rate": 9.977187246592076e-05, | |
| "loss": 0.8317021727561951, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.1613485851896448, | |
| "grad_norm": 0.558442522325844, | |
| "learning_rate": 9.976227575601947e-05, | |
| "loss": 0.7598085999488831, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.1625526791089705, | |
| "grad_norm": 0.641636921915755, | |
| "learning_rate": 9.97524818102765e-05, | |
| "loss": 0.837604284286499, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.16375677302829622, | |
| "grad_norm": 0.6091929337726231, | |
| "learning_rate": 9.974249066751077e-05, | |
| "loss": 0.9500613212585449, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.1649608669476219, | |
| "grad_norm": 0.6466178033726994, | |
| "learning_rate": 9.973230236732276e-05, | |
| "loss": 0.771168053150177, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.16616496086694763, | |
| "grad_norm": 0.6660733398000859, | |
| "learning_rate": 9.972191695009446e-05, | |
| "loss": 0.7790799140930176, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.16736905478627334, | |
| "grad_norm": 0.6115980204086684, | |
| "learning_rate": 9.971133445698908e-05, | |
| "loss": 0.7652186155319214, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.16857314870559903, | |
| "grad_norm": 0.5413962093401579, | |
| "learning_rate": 9.9700554929951e-05, | |
| "loss": 0.7228227257728577, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.16977724262492475, | |
| "grad_norm": 0.5939091006500794, | |
| "learning_rate": 9.968957841170554e-05, | |
| "loss": 0.8524500131607056, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.17098133654425046, | |
| "grad_norm": 0.6504939461933902, | |
| "learning_rate": 9.967840494575879e-05, | |
| "loss": 0.887976348400116, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.17218543046357615, | |
| "grad_norm": 0.6430414793958295, | |
| "learning_rate": 9.966703457639748e-05, | |
| "loss": 0.880845308303833, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.17338952438290187, | |
| "grad_norm": 0.5968094588570781, | |
| "learning_rate": 9.965546734868875e-05, | |
| "loss": 0.7715303897857666, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.17459361830222758, | |
| "grad_norm": 0.6214316919780661, | |
| "learning_rate": 9.964370330848005e-05, | |
| "loss": 0.7617921829223633, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.17579771222155327, | |
| "grad_norm": 0.6945966190350599, | |
| "learning_rate": 9.963174250239888e-05, | |
| "loss": 0.8942645788192749, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.177001806140879, | |
| "grad_norm": 0.5946158752732297, | |
| "learning_rate": 9.96195849778526e-05, | |
| "loss": 0.8035511374473572, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.1782059000602047, | |
| "grad_norm": 0.5570594020442262, | |
| "learning_rate": 9.960723078302832e-05, | |
| "loss": 0.746635377407074, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.1794099939795304, | |
| "grad_norm": 0.5300767685887228, | |
| "learning_rate": 9.959467996689264e-05, | |
| "loss": 0.788428783416748, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.1806140878988561, | |
| "grad_norm": 0.6429771066659856, | |
| "learning_rate": 9.95819325791915e-05, | |
| "loss": 0.8748104572296143, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.18181818181818182, | |
| "grad_norm": 0.6009681075436003, | |
| "learning_rate": 9.956898867044999e-05, | |
| "loss": 0.8715510368347168, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.18302227573750754, | |
| "grad_norm": 0.6287471875141659, | |
| "learning_rate": 9.955584829197203e-05, | |
| "loss": 0.9487425088882446, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.18422636965683323, | |
| "grad_norm": 0.5832830587585199, | |
| "learning_rate": 9.954251149584036e-05, | |
| "loss": 0.7733198404312134, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.18543046357615894, | |
| "grad_norm": 0.5985330987653703, | |
| "learning_rate": 9.952897833491617e-05, | |
| "loss": 0.8523417711257935, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.18663455749548466, | |
| "grad_norm": 0.5714807380926865, | |
| "learning_rate": 9.951524886283899e-05, | |
| "loss": 0.7397559881210327, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.18783865141481035, | |
| "grad_norm": 0.6440313958900563, | |
| "learning_rate": 9.950132313402642e-05, | |
| "loss": 0.8231230974197388, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.18904274533413606, | |
| "grad_norm": 0.7067535788120153, | |
| "learning_rate": 9.948720120367394e-05, | |
| "loss": 0.8909597396850586, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.19024683925346178, | |
| "grad_norm": 0.6005944043833608, | |
| "learning_rate": 9.947288312775471e-05, | |
| "loss": 0.7262487411499023, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.19145093317278747, | |
| "grad_norm": 0.6029984307683901, | |
| "learning_rate": 9.945836896301927e-05, | |
| "loss": 0.8319439888000488, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.19265502709211318, | |
| "grad_norm": 0.6224876412877436, | |
| "learning_rate": 9.944365876699544e-05, | |
| "loss": 0.8430992364883423, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1938591210114389, | |
| "grad_norm": 0.6160811180626786, | |
| "learning_rate": 9.942875259798796e-05, | |
| "loss": 0.7832828760147095, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.1950632149307646, | |
| "grad_norm": 0.6218195815569507, | |
| "learning_rate": 9.941365051507836e-05, | |
| "loss": 0.9227669835090637, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.1962673088500903, | |
| "grad_norm": 0.5986059436334881, | |
| "learning_rate": 9.939835257812468e-05, | |
| "loss": 0.9111921787261963, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.19747140276941602, | |
| "grad_norm": 0.5881408038735889, | |
| "learning_rate": 9.938285884776121e-05, | |
| "loss": 0.8965997695922852, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.1986754966887417, | |
| "grad_norm": 0.6374823214972137, | |
| "learning_rate": 9.936716938539834e-05, | |
| "loss": 0.8731657266616821, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.19987959060806743, | |
| "grad_norm": 0.6161832126609788, | |
| "learning_rate": 9.93512842532222e-05, | |
| "loss": 0.8580436706542969, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.20108368452739314, | |
| "grad_norm": 0.553470250749396, | |
| "learning_rate": 9.93352035141945e-05, | |
| "loss": 0.7036675810813904, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.20228777844671886, | |
| "grad_norm": 0.7770006457997596, | |
| "learning_rate": 9.931892723205221e-05, | |
| "loss": 0.8978402614593506, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.20349187236604455, | |
| "grad_norm": 0.5743393143744865, | |
| "learning_rate": 9.93024554713074e-05, | |
| "loss": 0.7655681371688843, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.20469596628537026, | |
| "grad_norm": 0.5517387700265929, | |
| "learning_rate": 9.92857882972469e-05, | |
| "loss": 0.797248899936676, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.20590006020469598, | |
| "grad_norm": 0.5547295397097244, | |
| "learning_rate": 9.926892577593208e-05, | |
| "loss": 0.8540539741516113, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.20710415412402167, | |
| "grad_norm": 0.5964203337683545, | |
| "learning_rate": 9.925186797419858e-05, | |
| "loss": 0.8034850358963013, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.20830824804334738, | |
| "grad_norm": 0.6189183804758942, | |
| "learning_rate": 9.923461495965607e-05, | |
| "loss": 0.8679941892623901, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.2095123419626731, | |
| "grad_norm": 0.5866776954815867, | |
| "learning_rate": 9.92171668006879e-05, | |
| "loss": 0.7853211164474487, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2107164358819988, | |
| "grad_norm": 0.5643948583479728, | |
| "learning_rate": 9.919952356645092e-05, | |
| "loss": 0.8060238361358643, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.2119205298013245, | |
| "grad_norm": 0.5823524489669061, | |
| "learning_rate": 9.91816853268752e-05, | |
| "loss": 0.8626150488853455, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.21312462372065022, | |
| "grad_norm": 0.5622075738754846, | |
| "learning_rate": 9.91636521526637e-05, | |
| "loss": 0.8187872767448425, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.2143287176399759, | |
| "grad_norm": 0.6733984895343411, | |
| "learning_rate": 9.9145424115292e-05, | |
| "loss": 0.8441307544708252, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.21553281155930162, | |
| "grad_norm": 0.5333453551939791, | |
| "learning_rate": 9.912700128700805e-05, | |
| "loss": 0.7993243932723999, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.21673690547862734, | |
| "grad_norm": 0.555877229699453, | |
| "learning_rate": 9.910838374083185e-05, | |
| "loss": 0.774019181728363, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.21794099939795303, | |
| "grad_norm": 0.6391151018213997, | |
| "learning_rate": 9.908957155055523e-05, | |
| "loss": 0.8688267469406128, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.21914509331727874, | |
| "grad_norm": 0.6241625884954857, | |
| "learning_rate": 9.907056479074142e-05, | |
| "loss": 0.847202718257904, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.22034918723660446, | |
| "grad_norm": 0.5890469266154632, | |
| "learning_rate": 9.905136353672492e-05, | |
| "loss": 0.7770888209342957, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.22155328115593018, | |
| "grad_norm": 0.5769999378849722, | |
| "learning_rate": 9.903196786461106e-05, | |
| "loss": 0.7439048290252686, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.22275737507525586, | |
| "grad_norm": 0.5754784178675677, | |
| "learning_rate": 9.901237785127577e-05, | |
| "loss": 0.8247470855712891, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.22396146899458158, | |
| "grad_norm": 0.5933793451997219, | |
| "learning_rate": 9.89925935743653e-05, | |
| "loss": 0.8449085354804993, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.2251655629139073, | |
| "grad_norm": 0.6188199832931084, | |
| "learning_rate": 9.897261511229583e-05, | |
| "loss": 0.9046580195426941, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.22636965683323299, | |
| "grad_norm": 0.5333245358703447, | |
| "learning_rate": 9.895244254425322e-05, | |
| "loss": 0.7648054957389832, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2275737507525587, | |
| "grad_norm": 0.6143930481556643, | |
| "learning_rate": 9.893207595019269e-05, | |
| "loss": 0.8550212383270264, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.22877784467188442, | |
| "grad_norm": 0.5796146274953775, | |
| "learning_rate": 9.891151541083852e-05, | |
| "loss": 0.8073182702064514, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2299819385912101, | |
| "grad_norm": 0.6327168339480935, | |
| "learning_rate": 9.889076100768362e-05, | |
| "loss": 0.8802028298377991, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.23118603251053582, | |
| "grad_norm": 0.6340397780374257, | |
| "learning_rate": 9.886981282298937e-05, | |
| "loss": 0.8678988218307495, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.23239012642986154, | |
| "grad_norm": 0.5781580859543161, | |
| "learning_rate": 9.884867093978519e-05, | |
| "loss": 0.7214905023574829, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.23359422034918723, | |
| "grad_norm": 0.5864889560079226, | |
| "learning_rate": 9.882733544186825e-05, | |
| "loss": 0.8775925636291504, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.23479831426851294, | |
| "grad_norm": 0.6195540628078187, | |
| "learning_rate": 9.880580641380306e-05, | |
| "loss": 0.7874777317047119, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.23600240818783866, | |
| "grad_norm": 0.6567543350650362, | |
| "learning_rate": 9.878408394092129e-05, | |
| "loss": 0.8925746083259583, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.23720650210716435, | |
| "grad_norm": 0.6610048982837163, | |
| "learning_rate": 9.876216810932129e-05, | |
| "loss": 0.8471571207046509, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.23841059602649006, | |
| "grad_norm": 0.6868969238880852, | |
| "learning_rate": 9.87400590058678e-05, | |
| "loss": 0.8910620808601379, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.23961468994581578, | |
| "grad_norm": 0.6370290531097156, | |
| "learning_rate": 9.871775671819162e-05, | |
| "loss": 0.8550009727478027, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.2408187838651415, | |
| "grad_norm": 0.5905145265636871, | |
| "learning_rate": 9.869526133468923e-05, | |
| "loss": 0.7858701348304749, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.24202287778446718, | |
| "grad_norm": 0.5396495024692038, | |
| "learning_rate": 9.867257294452245e-05, | |
| "loss": 0.7874826192855835, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.2432269717037929, | |
| "grad_norm": 0.5611619691570671, | |
| "learning_rate": 9.864969163761817e-05, | |
| "loss": 0.8445602059364319, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.24443106562311862, | |
| "grad_norm": 0.5927387531343714, | |
| "learning_rate": 9.86266175046678e-05, | |
| "loss": 0.7110156416893005, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.2456351595424443, | |
| "grad_norm": 0.5667132102728043, | |
| "learning_rate": 9.86033506371271e-05, | |
| "loss": 0.7977332472801208, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.24683925346177002, | |
| "grad_norm": 0.6083484012142651, | |
| "learning_rate": 9.857989112721574e-05, | |
| "loss": 0.7608157992362976, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.24804334738109574, | |
| "grad_norm": 0.5943891087363417, | |
| "learning_rate": 9.855623906791693e-05, | |
| "loss": 0.7913424968719482, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.24924744130042142, | |
| "grad_norm": 0.5548488707009894, | |
| "learning_rate": 9.853239455297705e-05, | |
| "loss": 0.851811945438385, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.25045153521974717, | |
| "grad_norm": 0.5095216387061532, | |
| "learning_rate": 9.850835767690532e-05, | |
| "loss": 0.7788999676704407, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.25165562913907286, | |
| "grad_norm": 0.6581296320927581, | |
| "learning_rate": 9.848412853497337e-05, | |
| "loss": 0.852281391620636, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.25285972305839854, | |
| "grad_norm": 0.5624078228873652, | |
| "learning_rate": 9.845970722321489e-05, | |
| "loss": 0.7634679079055786, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2540638169777243, | |
| "grad_norm": 0.58279073197933, | |
| "learning_rate": 9.843509383842525e-05, | |
| "loss": 0.8002668619155884, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.25526791089705, | |
| "grad_norm": 0.602131782992255, | |
| "learning_rate": 9.841028847816112e-05, | |
| "loss": 0.917669951915741, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.25647200481637566, | |
| "grad_norm": 0.5824275546138442, | |
| "learning_rate": 9.838529124074006e-05, | |
| "loss": 0.6941547393798828, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.2576760987357014, | |
| "grad_norm": 0.6855988184994416, | |
| "learning_rate": 9.836010222524018e-05, | |
| "loss": 0.7900656461715698, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.2588801926550271, | |
| "grad_norm": 0.6033793243686978, | |
| "learning_rate": 9.833472153149968e-05, | |
| "loss": 0.7909616231918335, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2600842865743528, | |
| "grad_norm": 0.5558985979936094, | |
| "learning_rate": 9.830914926011651e-05, | |
| "loss": 0.8461582660675049, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.26128838049367853, | |
| "grad_norm": 0.6135755856519033, | |
| "learning_rate": 9.828338551244794e-05, | |
| "loss": 0.8185094594955444, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.2624924744130042, | |
| "grad_norm": 0.5913147304313477, | |
| "learning_rate": 9.825743039061015e-05, | |
| "loss": 0.7694218754768372, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.2636965683323299, | |
| "grad_norm": 0.5826277813793379, | |
| "learning_rate": 9.823128399747788e-05, | |
| "loss": 0.9048447012901306, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.26490066225165565, | |
| "grad_norm": 0.6069752354101805, | |
| "learning_rate": 9.820494643668396e-05, | |
| "loss": 0.7263599038124084, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.26610475617098134, | |
| "grad_norm": 0.5779260861618932, | |
| "learning_rate": 9.817841781261894e-05, | |
| "loss": 0.7507243156433105, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.267308850090307, | |
| "grad_norm": 0.5407800371575219, | |
| "learning_rate": 9.815169823043066e-05, | |
| "loss": 0.7708674073219299, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.26851294400963277, | |
| "grad_norm": 0.5354161457972056, | |
| "learning_rate": 9.812478779602381e-05, | |
| "loss": 0.8195161819458008, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.26971703792895846, | |
| "grad_norm": 0.5435156479444293, | |
| "learning_rate": 9.809768661605956e-05, | |
| "loss": 0.8014235496520996, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.27092113184828415, | |
| "grad_norm": 0.6451511656258695, | |
| "learning_rate": 9.80703947979551e-05, | |
| "loss": 0.7672905921936035, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.2721252257676099, | |
| "grad_norm": 0.5770638130924962, | |
| "learning_rate": 9.804291244988324e-05, | |
| "loss": 0.8396711349487305, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2733293196869356, | |
| "grad_norm": 0.6047093718584222, | |
| "learning_rate": 9.801523968077195e-05, | |
| "loss": 0.8380450010299683, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.27453341360626127, | |
| "grad_norm": 0.5925957423670997, | |
| "learning_rate": 9.798737660030397e-05, | |
| "loss": 0.8844544887542725, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.275737507525587, | |
| "grad_norm": 0.5982585104488206, | |
| "learning_rate": 9.79593233189163e-05, | |
| "loss": 0.9639543294906616, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.2769416014449127, | |
| "grad_norm": 0.5851751603385734, | |
| "learning_rate": 9.793107994779988e-05, | |
| "loss": 0.7578297853469849, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2781456953642384, | |
| "grad_norm": 0.5662418714139069, | |
| "learning_rate": 9.790264659889903e-05, | |
| "loss": 0.7516130208969116, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.27934978928356413, | |
| "grad_norm": 0.5629010447957682, | |
| "learning_rate": 9.787402338491108e-05, | |
| "loss": 0.813753604888916, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2805538832028898, | |
| "grad_norm": 0.611997046314177, | |
| "learning_rate": 9.78452104192859e-05, | |
| "loss": 0.7318404912948608, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.2817579771222155, | |
| "grad_norm": 0.6083078236956858, | |
| "learning_rate": 9.781620781622546e-05, | |
| "loss": 0.7795756459236145, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.28296207104154125, | |
| "grad_norm": 0.5733435457346814, | |
| "learning_rate": 9.778701569068336e-05, | |
| "loss": 0.7374966740608215, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.28416616496086694, | |
| "grad_norm": 0.5854632993757632, | |
| "learning_rate": 9.775763415836439e-05, | |
| "loss": 0.8152198791503906, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.28537025888019263, | |
| "grad_norm": 0.5754299346488221, | |
| "learning_rate": 9.772806333572405e-05, | |
| "loss": 0.8636054992675781, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.2865743527995184, | |
| "grad_norm": 0.5422881955560872, | |
| "learning_rate": 9.76983033399681e-05, | |
| "loss": 0.7844322919845581, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.28777844671884406, | |
| "grad_norm": 0.6194774799956613, | |
| "learning_rate": 9.766835428905213e-05, | |
| "loss": 0.7630101442337036, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.2889825406381698, | |
| "grad_norm": 0.603975682522893, | |
| "learning_rate": 9.763821630168106e-05, | |
| "loss": 0.7873789668083191, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2901866345574955, | |
| "grad_norm": 0.6133591002499805, | |
| "learning_rate": 9.760788949730866e-05, | |
| "loss": 0.8574675917625427, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2913907284768212, | |
| "grad_norm": 0.5274827858620643, | |
| "learning_rate": 9.757737399613706e-05, | |
| "loss": 0.7098798751831055, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2925948223961469, | |
| "grad_norm": 0.5891469808235313, | |
| "learning_rate": 9.754666991911633e-05, | |
| "loss": 0.8549659252166748, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.2937989163154726, | |
| "grad_norm": 0.6778037762349854, | |
| "learning_rate": 9.751577738794398e-05, | |
| "loss": 0.8683890700340271, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.2950030102347983, | |
| "grad_norm": 0.5529589488208931, | |
| "learning_rate": 9.748469652506446e-05, | |
| "loss": 0.8280667066574097, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.29620710415412405, | |
| "grad_norm": 0.6019791904555852, | |
| "learning_rate": 9.745342745366867e-05, | |
| "loss": 0.8362815380096436, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.29741119807344973, | |
| "grad_norm": 0.6103921005737301, | |
| "learning_rate": 9.74219702976935e-05, | |
| "loss": 0.7211261987686157, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.2986152919927754, | |
| "grad_norm": 0.5250093329310034, | |
| "learning_rate": 9.739032518182134e-05, | |
| "loss": 0.8164722919464111, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.29981938591210117, | |
| "grad_norm": 0.5388495396934913, | |
| "learning_rate": 9.735849223147952e-05, | |
| "loss": 0.7128286361694336, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.30102347983142685, | |
| "grad_norm": 0.5829439265945735, | |
| "learning_rate": 9.732647157283994e-05, | |
| "loss": 0.8446158766746521, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.30222757375075254, | |
| "grad_norm": 0.5295167319429783, | |
| "learning_rate": 9.729426333281841e-05, | |
| "loss": 0.7621672749519348, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.3034316676700783, | |
| "grad_norm": 0.8911053202384595, | |
| "learning_rate": 9.726186763907432e-05, | |
| "loss": 0.8294147253036499, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.304635761589404, | |
| "grad_norm": 0.5529587210931857, | |
| "learning_rate": 9.722928462000995e-05, | |
| "loss": 0.7578885555267334, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.30583985550872966, | |
| "grad_norm": 0.6002485806157559, | |
| "learning_rate": 9.71965144047701e-05, | |
| "loss": 0.7943164110183716, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3070439494280554, | |
| "grad_norm": 0.5694226586132322, | |
| "learning_rate": 9.716355712324156e-05, | |
| "loss": 0.8210703134536743, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.3082480433473811, | |
| "grad_norm": 0.6922498599198393, | |
| "learning_rate": 9.713041290605254e-05, | |
| "loss": 0.873950719833374, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3094521372667068, | |
| "grad_norm": 0.5445472317700759, | |
| "learning_rate": 9.709708188457218e-05, | |
| "loss": 0.8812997341156006, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.3106562311860325, | |
| "grad_norm": 0.5722532810838292, | |
| "learning_rate": 9.706356419091003e-05, | |
| "loss": 0.854279637336731, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.3118603251053582, | |
| "grad_norm": 0.6050281714134376, | |
| "learning_rate": 9.702985995791554e-05, | |
| "loss": 0.7731985449790955, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.3130644190246839, | |
| "grad_norm": 0.5259270771468094, | |
| "learning_rate": 9.69959693191775e-05, | |
| "loss": 0.7495138645172119, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.31426851294400965, | |
| "grad_norm": 0.599702214924439, | |
| "learning_rate": 9.696189240902353e-05, | |
| "loss": 0.8307251334190369, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.31547260686333534, | |
| "grad_norm": 0.5772891113584918, | |
| "learning_rate": 9.69276293625196e-05, | |
| "loss": 0.7606898546218872, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.316676700782661, | |
| "grad_norm": 0.6048200131150295, | |
| "learning_rate": 9.689318031546935e-05, | |
| "loss": 0.7834568023681641, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.31788079470198677, | |
| "grad_norm": 0.5187602167429113, | |
| "learning_rate": 9.685854540441374e-05, | |
| "loss": 0.8568704724311829, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.31908488862131246, | |
| "grad_norm": 0.589371193118731, | |
| "learning_rate": 9.682372476663037e-05, | |
| "loss": 0.8562246561050415, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.32028898254063815, | |
| "grad_norm": 0.578899694521127, | |
| "learning_rate": 9.678871854013296e-05, | |
| "loss": 0.7672446966171265, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.3214930764599639, | |
| "grad_norm": 0.6249280291756892, | |
| "learning_rate": 9.675352686367086e-05, | |
| "loss": 0.7922170162200928, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.3226971703792896, | |
| "grad_norm": 0.5749398173798772, | |
| "learning_rate": 9.671814987672842e-05, | |
| "loss": 0.8341658115386963, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.32390126429861527, | |
| "grad_norm": 0.6275533667804223, | |
| "learning_rate": 9.668258771952453e-05, | |
| "loss": 0.807477593421936, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.325105358217941, | |
| "grad_norm": 0.5821581899182365, | |
| "learning_rate": 9.664684053301199e-05, | |
| "loss": 0.7355650663375854, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3263094521372667, | |
| "grad_norm": 0.5858441049798361, | |
| "learning_rate": 9.661090845887693e-05, | |
| "loss": 0.870678186416626, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.32751354605659244, | |
| "grad_norm": 0.5365325287676738, | |
| "learning_rate": 9.657479163953839e-05, | |
| "loss": 0.8608752489089966, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.32871763997591813, | |
| "grad_norm": 0.5599944247690204, | |
| "learning_rate": 9.653849021814759e-05, | |
| "loss": 0.8878623247146606, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.3299217338952438, | |
| "grad_norm": 0.5563052433880087, | |
| "learning_rate": 9.650200433858741e-05, | |
| "loss": 0.8141953945159912, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.33112582781456956, | |
| "grad_norm": 0.5535164304716184, | |
| "learning_rate": 9.646533414547193e-05, | |
| "loss": 0.7778948545455933, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.33232992173389525, | |
| "grad_norm": 0.6014215940932262, | |
| "learning_rate": 9.642847978414569e-05, | |
| "loss": 0.7866028547286987, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.33353401565322094, | |
| "grad_norm": 0.5331079808996885, | |
| "learning_rate": 9.639144140068324e-05, | |
| "loss": 0.7266688346862793, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.3347381095725467, | |
| "grad_norm": 0.5766611369648224, | |
| "learning_rate": 9.635421914188845e-05, | |
| "loss": 0.8532525300979614, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.33594220349187237, | |
| "grad_norm": 0.5358777135832988, | |
| "learning_rate": 9.631681315529408e-05, | |
| "loss": 0.8211830854415894, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.33714629741119806, | |
| "grad_norm": 0.6298604183968022, | |
| "learning_rate": 9.627922358916102e-05, | |
| "loss": 0.8692326545715332, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3383503913305238, | |
| "grad_norm": 0.5797251655827115, | |
| "learning_rate": 9.624145059247787e-05, | |
| "loss": 0.8560823798179626, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.3395544852498495, | |
| "grad_norm": 0.5434794437969054, | |
| "learning_rate": 9.620349431496018e-05, | |
| "loss": 0.8305436968803406, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.3407585791691752, | |
| "grad_norm": 0.6441021904672944, | |
| "learning_rate": 9.616535490705004e-05, | |
| "loss": 0.7797948122024536, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.3419626730885009, | |
| "grad_norm": 0.5598221419768312, | |
| "learning_rate": 9.612703251991528e-05, | |
| "loss": 0.7669990062713623, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3431667670078266, | |
| "grad_norm": 0.5580548895181614, | |
| "learning_rate": 9.60885273054491e-05, | |
| "loss": 0.8091415166854858, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.3443708609271523, | |
| "grad_norm": 0.6911896360916333, | |
| "learning_rate": 9.604983941626924e-05, | |
| "loss": 0.8437607288360596, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.34557495484647804, | |
| "grad_norm": 0.6177635146844122, | |
| "learning_rate": 9.601096900571757e-05, | |
| "loss": 0.891084611415863, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.34677904876580373, | |
| "grad_norm": 0.5777148183335996, | |
| "learning_rate": 9.597191622785932e-05, | |
| "loss": 0.8502274751663208, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3479831426851294, | |
| "grad_norm": 0.5809974008489335, | |
| "learning_rate": 9.593268123748259e-05, | |
| "loss": 0.8225856423377991, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.34918723660445516, | |
| "grad_norm": 0.6051633632643184, | |
| "learning_rate": 9.589326419009768e-05, | |
| "loss": 0.7742931842803955, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.35039133052378085, | |
| "grad_norm": 0.5650358876193098, | |
| "learning_rate": 9.585366524193648e-05, | |
| "loss": 0.7171435356140137, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.35159542444310654, | |
| "grad_norm": 0.4774910718398751, | |
| "learning_rate": 9.581388454995187e-05, | |
| "loss": 0.6769593954086304, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.3527995183624323, | |
| "grad_norm": 0.6156416845975364, | |
| "learning_rate": 9.577392227181706e-05, | |
| "loss": 0.7639175653457642, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.354003612281758, | |
| "grad_norm": 0.5799348616801011, | |
| "learning_rate": 9.573377856592499e-05, | |
| "loss": 0.8483726978302002, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.35520770620108366, | |
| "grad_norm": 0.569975520032633, | |
| "learning_rate": 9.569345359138771e-05, | |
| "loss": 0.6434216499328613, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3564118001204094, | |
| "grad_norm": 0.6057143088843095, | |
| "learning_rate": 9.565294750803575e-05, | |
| "loss": 0.7472547888755798, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3576158940397351, | |
| "grad_norm": 0.5763656433176603, | |
| "learning_rate": 9.561226047641745e-05, | |
| "loss": 0.7766754031181335, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.3588199879590608, | |
| "grad_norm": 0.5806884679582975, | |
| "learning_rate": 9.557139265779838e-05, | |
| "loss": 0.7611971497535706, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3600240818783865, | |
| "grad_norm": 0.653182180155357, | |
| "learning_rate": 9.553034421416062e-05, | |
| "loss": 0.8534923195838928, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.3612281757977122, | |
| "grad_norm": 0.5930589380494014, | |
| "learning_rate": 9.548911530820226e-05, | |
| "loss": 0.8566128611564636, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3624322697170379, | |
| "grad_norm": 0.594676216957681, | |
| "learning_rate": 9.544770610333655e-05, | |
| "loss": 0.7190215587615967, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 0.5423462580148574, | |
| "learning_rate": 9.540611676369145e-05, | |
| "loss": 0.7619057893753052, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.36484045755568933, | |
| "grad_norm": 0.6079659667743106, | |
| "learning_rate": 9.536434745410885e-05, | |
| "loss": 0.9198819398880005, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.3660445514750151, | |
| "grad_norm": 0.5481385740366345, | |
| "learning_rate": 9.5322398340144e-05, | |
| "loss": 0.7481440305709839, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.36724864539434077, | |
| "grad_norm": 0.5758241389293728, | |
| "learning_rate": 9.528026958806477e-05, | |
| "loss": 0.845780611038208, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.36845273931366646, | |
| "grad_norm": 0.6109260118302426, | |
| "learning_rate": 9.523796136485109e-05, | |
| "loss": 0.8439440131187439, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.3696568332329922, | |
| "grad_norm": 0.5199941673478126, | |
| "learning_rate": 9.519547383819416e-05, | |
| "loss": 0.8031255006790161, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.3708609271523179, | |
| "grad_norm": 0.5886779572113713, | |
| "learning_rate": 9.515280717649594e-05, | |
| "loss": 0.895370364189148, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.3720650210716436, | |
| "grad_norm": 0.5241621748424288, | |
| "learning_rate": 9.510996154886839e-05, | |
| "loss": 0.7253029346466064, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.3732691149909693, | |
| "grad_norm": 0.5349946958277024, | |
| "learning_rate": 9.506693712513274e-05, | |
| "loss": 0.7455810904502869, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.374473208910295, | |
| "grad_norm": 0.5413850636556015, | |
| "learning_rate": 9.502373407581897e-05, | |
| "loss": 0.8433940410614014, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.3756773028296207, | |
| "grad_norm": 0.5914855127133216, | |
| "learning_rate": 9.498035257216501e-05, | |
| "loss": 0.8574495911598206, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.37688139674894644, | |
| "grad_norm": 0.5786718132723062, | |
| "learning_rate": 9.493679278611616e-05, | |
| "loss": 0.8514014482498169, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.37808549066827213, | |
| "grad_norm": 0.677708362567278, | |
| "learning_rate": 9.489305489032425e-05, | |
| "loss": 0.5997920036315918, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3792895845875978, | |
| "grad_norm": 0.6181220022492139, | |
| "learning_rate": 9.484913905814716e-05, | |
| "loss": 0.7272148132324219, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.38049367850692356, | |
| "grad_norm": 0.5847159564015513, | |
| "learning_rate": 9.4805045463648e-05, | |
| "loss": 0.7532312870025635, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.38169777242624925, | |
| "grad_norm": 0.5884182751349588, | |
| "learning_rate": 9.47607742815944e-05, | |
| "loss": 0.7659265995025635, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.38290186634557494, | |
| "grad_norm": 0.5719592491678291, | |
| "learning_rate": 9.471632568745794e-05, | |
| "loss": 0.7821764945983887, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3841059602649007, | |
| "grad_norm": 0.5575576997437829, | |
| "learning_rate": 9.467169985741337e-05, | |
| "loss": 0.800609827041626, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.38531005418422637, | |
| "grad_norm": 0.5357479184752278, | |
| "learning_rate": 9.462689696833791e-05, | |
| "loss": 0.713945209980011, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.38651414810355206, | |
| "grad_norm": 0.6120104938097841, | |
| "learning_rate": 9.458191719781056e-05, | |
| "loss": 0.8058474063873291, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.3877182420228778, | |
| "grad_norm": 0.6693580400917779, | |
| "learning_rate": 9.453676072411142e-05, | |
| "loss": 0.8266781568527222, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.3889223359422035, | |
| "grad_norm": 0.5506575412307639, | |
| "learning_rate": 9.449142772622092e-05, | |
| "loss": 0.6587191224098206, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.3901264298615292, | |
| "grad_norm": 0.5511777246486614, | |
| "learning_rate": 9.444591838381922e-05, | |
| "loss": 0.7111427187919617, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3913305237808549, | |
| "grad_norm": 0.5746660125461024, | |
| "learning_rate": 9.440023287728537e-05, | |
| "loss": 0.9033454060554504, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.3925346177001806, | |
| "grad_norm": 0.569278534242423, | |
| "learning_rate": 9.435437138769672e-05, | |
| "loss": 0.7030330300331116, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.3937387116195063, | |
| "grad_norm": 0.6060745891610826, | |
| "learning_rate": 9.430833409682806e-05, | |
| "loss": 0.8240711688995361, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.39494280553883204, | |
| "grad_norm": 0.5308427592985828, | |
| "learning_rate": 9.426212118715108e-05, | |
| "loss": 0.735508382320404, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.39614689945815773, | |
| "grad_norm": 0.5760677673333868, | |
| "learning_rate": 9.421573284183345e-05, | |
| "loss": 0.8183559775352478, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.3973509933774834, | |
| "grad_norm": 0.5071606962993296, | |
| "learning_rate": 9.416916924473825e-05, | |
| "loss": 0.7347877025604248, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.39855508729680916, | |
| "grad_norm": 0.5763889971321361, | |
| "learning_rate": 9.412243058042315e-05, | |
| "loss": 0.8280633091926575, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.39975918121613485, | |
| "grad_norm": 0.5618383356364371, | |
| "learning_rate": 9.407551703413973e-05, | |
| "loss": 0.7195807695388794, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.40096327513546054, | |
| "grad_norm": 0.5895697765110283, | |
| "learning_rate": 9.402842879183272e-05, | |
| "loss": 0.8374162316322327, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.4021673690547863, | |
| "grad_norm": 0.5547510447572006, | |
| "learning_rate": 9.398116604013925e-05, | |
| "loss": 0.7269232869148254, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.40337146297411197, | |
| "grad_norm": 0.5747636829461152, | |
| "learning_rate": 9.393372896638816e-05, | |
| "loss": 0.7741429805755615, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.4045755568934377, | |
| "grad_norm": 0.5582603271143042, | |
| "learning_rate": 9.38861177585992e-05, | |
| "loss": 0.8213120102882385, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.4057796508127634, | |
| "grad_norm": 0.5698509481662322, | |
| "learning_rate": 9.383833260548233e-05, | |
| "loss": 0.7737472057342529, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.4069837447320891, | |
| "grad_norm": 0.5260228384061386, | |
| "learning_rate": 9.379037369643694e-05, | |
| "loss": 0.8149851560592651, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.40818783865141484, | |
| "grad_norm": 0.5765318318304814, | |
| "learning_rate": 9.374224122155111e-05, | |
| "loss": 0.7775697708129883, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.4093919325707405, | |
| "grad_norm": 0.49274787307145784, | |
| "learning_rate": 9.369393537160089e-05, | |
| "loss": 0.7359933853149414, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.4105960264900662, | |
| "grad_norm": 0.5528181652912268, | |
| "learning_rate": 9.364545633804946e-05, | |
| "loss": 0.6642189025878906, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.41180012040939196, | |
| "grad_norm": 0.6207226531988652, | |
| "learning_rate": 9.359680431304647e-05, | |
| "loss": 0.8349613547325134, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.41300421432871764, | |
| "grad_norm": 0.6106078516446312, | |
| "learning_rate": 9.354797948942719e-05, | |
| "loss": 0.8384692668914795, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.41420830824804333, | |
| "grad_norm": 0.5641755633737662, | |
| "learning_rate": 9.349898206071186e-05, | |
| "loss": 0.8039138317108154, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.4154124021673691, | |
| "grad_norm": 0.656469899503437, | |
| "learning_rate": 9.344981222110476e-05, | |
| "loss": 0.8086084723472595, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.41661649608669477, | |
| "grad_norm": 0.5864557245393566, | |
| "learning_rate": 9.340047016549358e-05, | |
| "loss": 0.7342469692230225, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.41782059000602045, | |
| "grad_norm": 0.6186040249224425, | |
| "learning_rate": 9.335095608944861e-05, | |
| "loss": 0.8007559180259705, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.4190246839253462, | |
| "grad_norm": 0.6651504394109262, | |
| "learning_rate": 9.330127018922194e-05, | |
| "loss": 0.7766905426979065, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.4202287778446719, | |
| "grad_norm": 0.6545527768938719, | |
| "learning_rate": 9.325141266174666e-05, | |
| "loss": 0.7787045836448669, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.4214328717639976, | |
| "grad_norm": 0.5138104578279857, | |
| "learning_rate": 9.320138370463618e-05, | |
| "loss": 0.7259654998779297, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4226369656833233, | |
| "grad_norm": 0.7090214107891576, | |
| "learning_rate": 9.315118351618335e-05, | |
| "loss": 0.8143008947372437, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.423841059602649, | |
| "grad_norm": 0.640107824693087, | |
| "learning_rate": 9.310081229535968e-05, | |
| "loss": 0.8052964210510254, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.4250451535219747, | |
| "grad_norm": 0.5838874533239824, | |
| "learning_rate": 9.305027024181462e-05, | |
| "loss": 0.7851589322090149, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.42624924744130044, | |
| "grad_norm": 0.6875607759241701, | |
| "learning_rate": 9.29995575558747e-05, | |
| "loss": 0.8351097106933594, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4274533413606261, | |
| "grad_norm": 0.6132435281643827, | |
| "learning_rate": 9.294867443854278e-05, | |
| "loss": 0.7207709550857544, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.4286574352799518, | |
| "grad_norm": 0.6018423864599579, | |
| "learning_rate": 9.289762109149723e-05, | |
| "loss": 0.7128463983535767, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.42986152919927756, | |
| "grad_norm": 0.5966467960285611, | |
| "learning_rate": 9.284639771709112e-05, | |
| "loss": 0.8051083087921143, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.43106562311860325, | |
| "grad_norm": 0.5677436973852517, | |
| "learning_rate": 9.279500451835145e-05, | |
| "loss": 0.7338952422142029, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.43226971703792894, | |
| "grad_norm": 0.5633327946185779, | |
| "learning_rate": 9.274344169897834e-05, | |
| "loss": 0.7003933787345886, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.4334738109572547, | |
| "grad_norm": 0.6099766108022538, | |
| "learning_rate": 9.269170946334418e-05, | |
| "loss": 0.7850499153137207, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.43467790487658037, | |
| "grad_norm": 0.6363308217936156, | |
| "learning_rate": 9.263980801649286e-05, | |
| "loss": 0.8188478946685791, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.43588199879590606, | |
| "grad_norm": 0.5579250798677722, | |
| "learning_rate": 9.258773756413898e-05, | |
| "loss": 0.731178879737854, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.4370860927152318, | |
| "grad_norm": 0.5277326840605376, | |
| "learning_rate": 9.253549831266696e-05, | |
| "loss": 0.7181439399719238, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.4382901866345575, | |
| "grad_norm": 0.5991414923556702, | |
| "learning_rate": 9.248309046913032e-05, | |
| "loss": 0.7859128713607788, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4394942805538832, | |
| "grad_norm": 0.5636427682419628, | |
| "learning_rate": 9.243051424125075e-05, | |
| "loss": 0.7138811349868774, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.4406983744732089, | |
| "grad_norm": 0.5846552018839858, | |
| "learning_rate": 9.237776983741735e-05, | |
| "loss": 0.8321449756622314, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.4419024683925346, | |
| "grad_norm": 0.5961509645070332, | |
| "learning_rate": 9.232485746668584e-05, | |
| "loss": 0.8703262805938721, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.44310656231186035, | |
| "grad_norm": 0.6575399410305954, | |
| "learning_rate": 9.227177733877763e-05, | |
| "loss": 0.8056511878967285, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.44431065623118604, | |
| "grad_norm": 0.6112157543514062, | |
| "learning_rate": 9.221852966407909e-05, | |
| "loss": 0.81901615858078, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.44551475015051173, | |
| "grad_norm": 0.5780705238766984, | |
| "learning_rate": 9.216511465364066e-05, | |
| "loss": 0.7020336389541626, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.4467188440698375, | |
| "grad_norm": 0.5513629981168275, | |
| "learning_rate": 9.2111532519176e-05, | |
| "loss": 0.7623274326324463, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.44792293798916316, | |
| "grad_norm": 0.5590339331584651, | |
| "learning_rate": 9.205778347306121e-05, | |
| "loss": 0.7354063987731934, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.44912703190848885, | |
| "grad_norm": 0.5498974325177253, | |
| "learning_rate": 9.200386772833394e-05, | |
| "loss": 0.6966525316238403, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.4503311258278146, | |
| "grad_norm": 0.5404885313697676, | |
| "learning_rate": 9.194978549869256e-05, | |
| "loss": 0.9187078475952148, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4515352197471403, | |
| "grad_norm": 0.574364257307318, | |
| "learning_rate": 9.189553699849531e-05, | |
| "loss": 0.7900419235229492, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.45273931366646597, | |
| "grad_norm": 0.5876347961897604, | |
| "learning_rate": 9.184112244275948e-05, | |
| "loss": 0.8052917122840881, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.4539434075857917, | |
| "grad_norm": 0.509622094995267, | |
| "learning_rate": 9.178654204716048e-05, | |
| "loss": 0.7819595336914062, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.4551475015051174, | |
| "grad_norm": 0.5616258880534972, | |
| "learning_rate": 9.173179602803108e-05, | |
| "loss": 0.8665533065795898, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.4563515954244431, | |
| "grad_norm": 0.5674735960986814, | |
| "learning_rate": 9.167688460236049e-05, | |
| "loss": 0.7420907020568848, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.45755568934376883, | |
| "grad_norm": 0.5651074504659922, | |
| "learning_rate": 9.162180798779354e-05, | |
| "loss": 0.6834886074066162, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4587597832630945, | |
| "grad_norm": 0.5208113984513062, | |
| "learning_rate": 9.156656640262975e-05, | |
| "loss": 0.7298674583435059, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.4599638771824202, | |
| "grad_norm": 0.5568478288456997, | |
| "learning_rate": 9.151116006582259e-05, | |
| "loss": 0.7340203523635864, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.46116797110174595, | |
| "grad_norm": 0.6609119540723819, | |
| "learning_rate": 9.145558919697844e-05, | |
| "loss": 0.8512008190155029, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.46237206502107164, | |
| "grad_norm": 0.5667512563774485, | |
| "learning_rate": 9.139985401635587e-05, | |
| "loss": 0.7891113758087158, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.46357615894039733, | |
| "grad_norm": 0.5542758268779019, | |
| "learning_rate": 9.13439547448647e-05, | |
| "loss": 0.7792179584503174, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.4647802528597231, | |
| "grad_norm": 0.5996658004222291, | |
| "learning_rate": 9.128789160406512e-05, | |
| "loss": 0.9395783543586731, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.46598434677904876, | |
| "grad_norm": 0.5073928978968797, | |
| "learning_rate": 9.123166481616687e-05, | |
| "loss": 0.6415067911148071, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.46718844069837445, | |
| "grad_norm": 0.631792579802824, | |
| "learning_rate": 9.117527460402826e-05, | |
| "loss": 0.8739094734191895, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.4683925346177002, | |
| "grad_norm": 0.5554177822601125, | |
| "learning_rate": 9.111872119115537e-05, | |
| "loss": 0.7036592960357666, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.4695966285370259, | |
| "grad_norm": 0.5645666409480741, | |
| "learning_rate": 9.106200480170113e-05, | |
| "loss": 0.7435568571090698, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.4708007224563516, | |
| "grad_norm": 0.5486147438942627, | |
| "learning_rate": 9.100512566046444e-05, | |
| "loss": 0.6777222752571106, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.4720048163756773, | |
| "grad_norm": 0.5789769653823079, | |
| "learning_rate": 9.094808399288927e-05, | |
| "loss": 0.7870496511459351, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.473208910295003, | |
| "grad_norm": 0.6306138629757833, | |
| "learning_rate": 9.089088002506379e-05, | |
| "loss": 0.9221470355987549, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.4744130042143287, | |
| "grad_norm": 0.63364802954993, | |
| "learning_rate": 9.083351398371944e-05, | |
| "loss": 0.7603942155838013, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.47561709813365444, | |
| "grad_norm": 0.7115530757590298, | |
| "learning_rate": 9.077598609623006e-05, | |
| "loss": 0.7661646604537964, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.4768211920529801, | |
| "grad_norm": 0.5186352616369874, | |
| "learning_rate": 9.071829659061097e-05, | |
| "loss": 0.7703070640563965, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.4780252859723058, | |
| "grad_norm": 0.5523785951320875, | |
| "learning_rate": 9.066044569551807e-05, | |
| "loss": 0.6563143730163574, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.47922937989163156, | |
| "grad_norm": 0.6412244640505108, | |
| "learning_rate": 9.060243364024692e-05, | |
| "loss": 0.8129726052284241, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.48043347381095725, | |
| "grad_norm": 0.5840224219823051, | |
| "learning_rate": 9.054426065473192e-05, | |
| "loss": 0.7899963855743408, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.481637567730283, | |
| "grad_norm": 0.615345356179797, | |
| "learning_rate": 9.048592696954524e-05, | |
| "loss": 0.7919591665267944, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4828416616496087, | |
| "grad_norm": 0.5594464109878114, | |
| "learning_rate": 9.042743281589605e-05, | |
| "loss": 0.7602540254592896, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.48404575556893437, | |
| "grad_norm": 0.523704912811271, | |
| "learning_rate": 9.036877842562952e-05, | |
| "loss": 0.8046675324440002, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4852498494882601, | |
| "grad_norm": 0.564482954113039, | |
| "learning_rate": 9.030996403122592e-05, | |
| "loss": 0.8811895847320557, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4864539434075858, | |
| "grad_norm": 0.6386258144233664, | |
| "learning_rate": 9.025098986579975e-05, | |
| "loss": 0.7436861991882324, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.4876580373269115, | |
| "grad_norm": 0.5765254802073968, | |
| "learning_rate": 9.019185616309871e-05, | |
| "loss": 0.6859670877456665, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.48886213124623723, | |
| "grad_norm": 0.5730166849121991, | |
| "learning_rate": 9.013256315750291e-05, | |
| "loss": 0.7614256143569946, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4900662251655629, | |
| "grad_norm": 0.5357875716257245, | |
| "learning_rate": 9.007311108402379e-05, | |
| "loss": 0.7794307470321655, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.4912703190848886, | |
| "grad_norm": 0.5081696602355975, | |
| "learning_rate": 9.001350017830331e-05, | |
| "loss": 0.8063097596168518, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.49247441300421435, | |
| "grad_norm": 0.5628512810128503, | |
| "learning_rate": 8.995373067661296e-05, | |
| "loss": 0.8180215358734131, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.49367850692354004, | |
| "grad_norm": 0.5787543459084703, | |
| "learning_rate": 8.989380281585287e-05, | |
| "loss": 0.7372187376022339, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.4948826008428657, | |
| "grad_norm": 0.5667198954493534, | |
| "learning_rate": 8.983371683355075e-05, | |
| "loss": 0.7635501623153687, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.49608669476219147, | |
| "grad_norm": 0.5600547455387459, | |
| "learning_rate": 8.977347296786113e-05, | |
| "loss": 0.7958824634552002, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.49729078868151716, | |
| "grad_norm": 0.5625737085034314, | |
| "learning_rate": 8.971307145756426e-05, | |
| "loss": 0.841810405254364, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.49849488260084285, | |
| "grad_norm": 0.5614410910946721, | |
| "learning_rate": 8.965251254206524e-05, | |
| "loss": 0.7582192420959473, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.4996989765201686, | |
| "grad_norm": 0.5645502655557872, | |
| "learning_rate": 8.959179646139306e-05, | |
| "loss": 0.7968418598175049, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.5009030704394943, | |
| "grad_norm": 0.5917884667130379, | |
| "learning_rate": 8.953092345619964e-05, | |
| "loss": 0.7377205491065979, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.50210716435882, | |
| "grad_norm": 0.5243909360294917, | |
| "learning_rate": 8.94698937677589e-05, | |
| "loss": 0.7942450642585754, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.5033112582781457, | |
| "grad_norm": 0.5980300395240639, | |
| "learning_rate": 8.940870763796574e-05, | |
| "loss": 0.8235912322998047, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.5045153521974715, | |
| "grad_norm": 0.5811078832563148, | |
| "learning_rate": 8.934736530933519e-05, | |
| "loss": 0.8105367422103882, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.5057194461167971, | |
| "grad_norm": 0.4920507260541056, | |
| "learning_rate": 8.928586702500128e-05, | |
| "loss": 0.7134383916854858, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5069235400361228, | |
| "grad_norm": 0.5367894318907388, | |
| "learning_rate": 8.922421302871629e-05, | |
| "loss": 0.6764764785766602, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.5081276339554486, | |
| "grad_norm": 0.5426721151064777, | |
| "learning_rate": 8.916240356484961e-05, | |
| "loss": 0.8036496639251709, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.5093317278747742, | |
| "grad_norm": 0.5283738533968033, | |
| "learning_rate": 8.910043887838686e-05, | |
| "loss": 0.7051277160644531, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.5105358217941, | |
| "grad_norm": 0.5568815068292188, | |
| "learning_rate": 8.903831921492889e-05, | |
| "loss": 0.7518850564956665, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.5117399157134257, | |
| "grad_norm": 0.6019868311120965, | |
| "learning_rate": 8.89760448206908e-05, | |
| "loss": 0.8285863399505615, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.5129440096327513, | |
| "grad_norm": 0.5322512751282783, | |
| "learning_rate": 8.8913615942501e-05, | |
| "loss": 0.8142893314361572, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.5141481035520771, | |
| "grad_norm": 0.5773593281276044, | |
| "learning_rate": 8.885103282780016e-05, | |
| "loss": 0.7128541469573975, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.5153521974714028, | |
| "grad_norm": 0.5265057895614733, | |
| "learning_rate": 8.878829572464034e-05, | |
| "loss": 0.7287251949310303, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.5165562913907285, | |
| "grad_norm": 0.6126298896774156, | |
| "learning_rate": 8.872540488168389e-05, | |
| "loss": 0.7118552923202515, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.5177603853100542, | |
| "grad_norm": 0.5472240818223483, | |
| "learning_rate": 8.866236054820251e-05, | |
| "loss": 0.7330264449119568, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.5189644792293799, | |
| "grad_norm": 0.5423773205241336, | |
| "learning_rate": 8.859916297407636e-05, | |
| "loss": 0.8498375415802002, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.5201685731487056, | |
| "grad_norm": 0.511978093072427, | |
| "learning_rate": 8.853581240979285e-05, | |
| "loss": 0.7740522623062134, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.5213726670680313, | |
| "grad_norm": 0.6328693375860855, | |
| "learning_rate": 8.847230910644586e-05, | |
| "loss": 0.8013511896133423, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.5225767609873571, | |
| "grad_norm": 0.5191934031368447, | |
| "learning_rate": 8.840865331573465e-05, | |
| "loss": 0.8275898098945618, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.5237808549066827, | |
| "grad_norm": 0.6078817477329859, | |
| "learning_rate": 8.834484528996287e-05, | |
| "loss": 0.7879576086997986, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.5249849488260084, | |
| "grad_norm": 0.5932516211316311, | |
| "learning_rate": 8.828088528203753e-05, | |
| "loss": 0.7822805643081665, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.5261890427453342, | |
| "grad_norm": 0.5236270498419456, | |
| "learning_rate": 8.821677354546807e-05, | |
| "loss": 0.8038450479507446, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.5273931366646598, | |
| "grad_norm": 0.549932395037056, | |
| "learning_rate": 8.815251033436531e-05, | |
| "loss": 0.734670877456665, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.5285972305839856, | |
| "grad_norm": 0.5443522562767869, | |
| "learning_rate": 8.808809590344042e-05, | |
| "loss": 0.7292616367340088, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.5298013245033113, | |
| "grad_norm": 0.614247181494645, | |
| "learning_rate": 8.802353050800398e-05, | |
| "loss": 0.8566461801528931, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5310054184226369, | |
| "grad_norm": 0.6195557998692361, | |
| "learning_rate": 8.795881440396491e-05, | |
| "loss": 0.8758805990219116, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.5322095123419627, | |
| "grad_norm": 0.6027928486912933, | |
| "learning_rate": 8.789394784782945e-05, | |
| "loss": 0.8482891917228699, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5334136062612884, | |
| "grad_norm": 0.5778637021073555, | |
| "learning_rate": 8.78289310967002e-05, | |
| "loss": 0.8151867389678955, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.534617700180614, | |
| "grad_norm": 0.5831694881381801, | |
| "learning_rate": 8.776376440827505e-05, | |
| "loss": 0.7579381465911865, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5358217940999398, | |
| "grad_norm": 0.5875591850025278, | |
| "learning_rate": 8.769844804084619e-05, | |
| "loss": 0.7059892416000366, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.5370258880192655, | |
| "grad_norm": 0.5184140330764471, | |
| "learning_rate": 8.763298225329903e-05, | |
| "loss": 0.7210403680801392, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5382299819385912, | |
| "grad_norm": 0.5420847104763425, | |
| "learning_rate": 8.756736730511128e-05, | |
| "loss": 0.7385925650596619, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.5394340758579169, | |
| "grad_norm": 0.5935346067614179, | |
| "learning_rate": 8.750160345635183e-05, | |
| "loss": 0.8868709802627563, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5406381697772427, | |
| "grad_norm": 0.5537934394668839, | |
| "learning_rate": 8.74356909676797e-05, | |
| "loss": 0.779586911201477, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.5418422636965683, | |
| "grad_norm": 0.5601615221122076, | |
| "learning_rate": 8.736963010034311e-05, | |
| "loss": 0.7346528768539429, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.543046357615894, | |
| "grad_norm": 0.5745725564232856, | |
| "learning_rate": 8.73034211161784e-05, | |
| "loss": 0.7848095893859863, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.5442504515352198, | |
| "grad_norm": 0.5335933434495655, | |
| "learning_rate": 8.723706427760892e-05, | |
| "loss": 0.7612522840499878, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.5454545454545454, | |
| "grad_norm": 0.5227005627709939, | |
| "learning_rate": 8.717055984764411e-05, | |
| "loss": 0.6311644911766052, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.5466586393738712, | |
| "grad_norm": 0.569956653853632, | |
| "learning_rate": 8.710390808987833e-05, | |
| "loss": 0.7876021862030029, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5478627332931969, | |
| "grad_norm": 0.5696433799827839, | |
| "learning_rate": 8.703710926848995e-05, | |
| "loss": 0.7248250246047974, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.5490668272125225, | |
| "grad_norm": 0.6084782666777745, | |
| "learning_rate": 8.697016364824023e-05, | |
| "loss": 0.816514253616333, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5502709211318483, | |
| "grad_norm": 0.5436677824446347, | |
| "learning_rate": 8.690307149447221e-05, | |
| "loss": 0.7234601974487305, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.551475015051174, | |
| "grad_norm": 0.5270803980787487, | |
| "learning_rate": 8.683583307310978e-05, | |
| "loss": 0.6568968892097473, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5526791089704997, | |
| "grad_norm": 0.5707483764552527, | |
| "learning_rate": 8.676844865065659e-05, | |
| "loss": 0.777597188949585, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5538832028898254, | |
| "grad_norm": 0.640463873721775, | |
| "learning_rate": 8.67009184941949e-05, | |
| "loss": 0.9555122256278992, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5550872968091511, | |
| "grad_norm": 0.5567654676409305, | |
| "learning_rate": 8.663324287138469e-05, | |
| "loss": 0.8270890712738037, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.5562913907284768, | |
| "grad_norm": 0.5072057355413937, | |
| "learning_rate": 8.656542205046243e-05, | |
| "loss": 0.6548333764076233, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5574954846478025, | |
| "grad_norm": 0.5192512231836927, | |
| "learning_rate": 8.64974563002401e-05, | |
| "loss": 0.7468447089195251, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.5586995785671283, | |
| "grad_norm": 0.6679093927996793, | |
| "learning_rate": 8.642934589010414e-05, | |
| "loss": 0.8109292387962341, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5599036724864539, | |
| "grad_norm": 0.566273727377757, | |
| "learning_rate": 8.636109109001438e-05, | |
| "loss": 0.7278716564178467, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.5611077664057796, | |
| "grad_norm": 0.5609594487670674, | |
| "learning_rate": 8.629269217050289e-05, | |
| "loss": 0.8199411630630493, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5623118603251054, | |
| "grad_norm": 0.5367001348544211, | |
| "learning_rate": 8.6224149402673e-05, | |
| "loss": 0.7463403344154358, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.563515954244431, | |
| "grad_norm": 0.546359613860392, | |
| "learning_rate": 8.61554630581982e-05, | |
| "loss": 0.7412172555923462, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.5647200481637568, | |
| "grad_norm": 0.5641710231968654, | |
| "learning_rate": 8.608663340932104e-05, | |
| "loss": 0.758033275604248, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.5659241420830825, | |
| "grad_norm": 0.5512535825337246, | |
| "learning_rate": 8.601766072885204e-05, | |
| "loss": 0.8045417070388794, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5671282360024081, | |
| "grad_norm": 0.5665039364926218, | |
| "learning_rate": 8.594854529016872e-05, | |
| "loss": 0.7329437732696533, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.5683323299217339, | |
| "grad_norm": 0.5888725079503595, | |
| "learning_rate": 8.587928736721432e-05, | |
| "loss": 0.669249951839447, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5695364238410596, | |
| "grad_norm": 0.6183004106038696, | |
| "learning_rate": 8.580988723449688e-05, | |
| "loss": 0.8152425289154053, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.5707405177603853, | |
| "grad_norm": 0.5391269856377908, | |
| "learning_rate": 8.574034516708814e-05, | |
| "loss": 0.7069454789161682, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.571944611679711, | |
| "grad_norm": 0.543803781167708, | |
| "learning_rate": 8.567066144062232e-05, | |
| "loss": 0.819575309753418, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5731487055990367, | |
| "grad_norm": 0.5310954607891327, | |
| "learning_rate": 8.560083633129519e-05, | |
| "loss": 0.7727447748184204, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5743527995183624, | |
| "grad_norm": 0.5336411225614897, | |
| "learning_rate": 8.553087011586284e-05, | |
| "loss": 0.7410896420478821, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.5755568934376881, | |
| "grad_norm": 0.5160334921971708, | |
| "learning_rate": 8.546076307164068e-05, | |
| "loss": 0.6553800106048584, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5767609873570139, | |
| "grad_norm": 0.5302079661582317, | |
| "learning_rate": 8.53905154765023e-05, | |
| "loss": 0.7029821872711182, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.5779650812763396, | |
| "grad_norm": 0.5600636568683036, | |
| "learning_rate": 8.532012760887837e-05, | |
| "loss": 0.7383387088775635, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5791691751956652, | |
| "grad_norm": 0.5998929893076258, | |
| "learning_rate": 8.524959974775551e-05, | |
| "loss": 0.6950462460517883, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.580373269114991, | |
| "grad_norm": 0.5365329118532681, | |
| "learning_rate": 8.517893217267525e-05, | |
| "loss": 0.778092622756958, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5815773630343167, | |
| "grad_norm": 0.5368638703097667, | |
| "learning_rate": 8.510812516373288e-05, | |
| "loss": 0.7105111479759216, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.5827814569536424, | |
| "grad_norm": 0.5907429035845754, | |
| "learning_rate": 8.503717900157632e-05, | |
| "loss": 0.7242652177810669, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5839855508729681, | |
| "grad_norm": 0.5536338375378821, | |
| "learning_rate": 8.496609396740506e-05, | |
| "loss": 0.7318590879440308, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5851896447922939, | |
| "grad_norm": 0.5372423258283444, | |
| "learning_rate": 8.489487034296902e-05, | |
| "loss": 0.693002462387085, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5863937387116195, | |
| "grad_norm": 0.5789242229583803, | |
| "learning_rate": 8.482350841056737e-05, | |
| "loss": 0.8206942081451416, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.5875978326309452, | |
| "grad_norm": 0.5233343970495764, | |
| "learning_rate": 8.475200845304758e-05, | |
| "loss": 0.7247848510742188, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.588801926550271, | |
| "grad_norm": 0.5660226782273997, | |
| "learning_rate": 8.468037075380408e-05, | |
| "loss": 0.7088282108306885, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.5900060204695966, | |
| "grad_norm": 0.6290332733728029, | |
| "learning_rate": 8.460859559677734e-05, | |
| "loss": 0.7779991626739502, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5912101143889223, | |
| "grad_norm": 0.5914880251961212, | |
| "learning_rate": 8.453668326645259e-05, | |
| "loss": 0.6892540454864502, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.5924142083082481, | |
| "grad_norm": 0.5921474437264784, | |
| "learning_rate": 8.446463404785875e-05, | |
| "loss": 0.7645236253738403, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5936183022275737, | |
| "grad_norm": 0.527891720939569, | |
| "learning_rate": 8.43924482265674e-05, | |
| "loss": 0.699411153793335, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5948223961468995, | |
| "grad_norm": 0.5808472585843106, | |
| "learning_rate": 8.432012608869141e-05, | |
| "loss": 0.6047768592834473, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.5960264900662252, | |
| "grad_norm": 0.7262338174416691, | |
| "learning_rate": 8.424766792088408e-05, | |
| "loss": 0.7811301946640015, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5972305839855508, | |
| "grad_norm": 0.6179024400861065, | |
| "learning_rate": 8.417507401033779e-05, | |
| "loss": 0.7106954455375671, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5984346779048766, | |
| "grad_norm": 0.5815438691669637, | |
| "learning_rate": 8.410234464478297e-05, | |
| "loss": 0.7327958345413208, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.5996387718242023, | |
| "grad_norm": 0.5730949644157526, | |
| "learning_rate": 8.402948011248692e-05, | |
| "loss": 0.7256393432617188, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.600842865743528, | |
| "grad_norm": 0.521591136634977, | |
| "learning_rate": 8.395648070225272e-05, | |
| "loss": 0.7140817046165466, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.6020469596628537, | |
| "grad_norm": 0.5589447096353005, | |
| "learning_rate": 8.388334670341805e-05, | |
| "loss": 0.8448606729507446, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6032510535821795, | |
| "grad_norm": 0.6149413795804055, | |
| "learning_rate": 8.381007840585395e-05, | |
| "loss": 0.8227097988128662, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.6044551475015051, | |
| "grad_norm": 0.5875235914864911, | |
| "learning_rate": 8.373667609996387e-05, | |
| "loss": 0.7119932174682617, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.6056592414208308, | |
| "grad_norm": 0.577257899771382, | |
| "learning_rate": 8.366314007668235e-05, | |
| "loss": 0.8036626577377319, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.6068633353401566, | |
| "grad_norm": 0.6029845794394658, | |
| "learning_rate": 8.358947062747397e-05, | |
| "loss": 0.7943709492683411, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.6080674292594822, | |
| "grad_norm": 0.5679689315213442, | |
| "learning_rate": 8.351566804433207e-05, | |
| "loss": 0.7255787253379822, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.609271523178808, | |
| "grad_norm": 0.5861915732495735, | |
| "learning_rate": 8.344173261977777e-05, | |
| "loss": 0.7799243927001953, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.6104756170981337, | |
| "grad_norm": 0.5291794657388438, | |
| "learning_rate": 8.336766464685869e-05, | |
| "loss": 0.6609664559364319, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.6116797110174593, | |
| "grad_norm": 0.5419697178161531, | |
| "learning_rate": 8.329346441914774e-05, | |
| "loss": 0.7047343850135803, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.6128838049367851, | |
| "grad_norm": 0.5805226842218582, | |
| "learning_rate": 8.321913223074212e-05, | |
| "loss": 0.8172140121459961, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.6140878988561108, | |
| "grad_norm": 0.5298500503507028, | |
| "learning_rate": 8.314466837626205e-05, | |
| "loss": 0.7412742376327515, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.6152919927754364, | |
| "grad_norm": 0.5840010321657718, | |
| "learning_rate": 8.307007315084958e-05, | |
| "loss": 0.8661916851997375, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.6164960866947622, | |
| "grad_norm": 0.5728057698967226, | |
| "learning_rate": 8.299534685016747e-05, | |
| "loss": 0.8871906399726868, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.6177001806140879, | |
| "grad_norm": 0.5253990819928123, | |
| "learning_rate": 8.292048977039801e-05, | |
| "loss": 0.7414618730545044, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.6189042745334136, | |
| "grad_norm": 0.5776425296105538, | |
| "learning_rate": 8.284550220824187e-05, | |
| "loss": 0.8262412548065186, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.6201083684527393, | |
| "grad_norm": 0.5631058348593229, | |
| "learning_rate": 8.277038446091683e-05, | |
| "loss": 0.7710633277893066, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.621312462372065, | |
| "grad_norm": 0.575180578671768, | |
| "learning_rate": 8.269513682615672e-05, | |
| "loss": 0.7422937750816345, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.6225165562913907, | |
| "grad_norm": 0.5628819507888372, | |
| "learning_rate": 8.261975960221017e-05, | |
| "loss": 0.7483841776847839, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.6237206502107164, | |
| "grad_norm": 0.5492417710470213, | |
| "learning_rate": 8.254425308783944e-05, | |
| "loss": 0.735903799533844, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.6249247441300422, | |
| "grad_norm": 0.6072331817997367, | |
| "learning_rate": 8.246861758231925e-05, | |
| "loss": 0.8859599828720093, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.6261288380493678, | |
| "grad_norm": 0.5334883775025814, | |
| "learning_rate": 8.239285338543558e-05, | |
| "loss": 0.8330262303352356, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.6273329319686936, | |
| "grad_norm": 0.5255759564099236, | |
| "learning_rate": 8.23169607974845e-05, | |
| "loss": 0.7606721520423889, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.6285370258880193, | |
| "grad_norm": 0.5114246112951885, | |
| "learning_rate": 8.224094011927091e-05, | |
| "loss": 0.6729496717453003, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.6297411198073449, | |
| "grad_norm": 0.6173305080246639, | |
| "learning_rate": 8.216479165210748e-05, | |
| "loss": 0.7409894466400146, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.6309452137266707, | |
| "grad_norm": 0.5617836428921698, | |
| "learning_rate": 8.208851569781335e-05, | |
| "loss": 0.7701466083526611, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.6321493076459964, | |
| "grad_norm": 0.556774285143137, | |
| "learning_rate": 8.201211255871293e-05, | |
| "loss": 0.7755535840988159, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.633353401565322, | |
| "grad_norm": 0.5473181136107016, | |
| "learning_rate": 8.193558253763478e-05, | |
| "loss": 0.771713137626648, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.6345574954846478, | |
| "grad_norm": 0.5892189910907214, | |
| "learning_rate": 8.185892593791034e-05, | |
| "loss": 0.8112677335739136, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.6357615894039735, | |
| "grad_norm": 0.49813360212693836, | |
| "learning_rate": 8.178214306337278e-05, | |
| "loss": 0.6962313652038574, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6369656833232992, | |
| "grad_norm": 0.5163153013576592, | |
| "learning_rate": 8.170523421835572e-05, | |
| "loss": 0.6276844143867493, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.6381697772426249, | |
| "grad_norm": 0.5764131088706974, | |
| "learning_rate": 8.162819970769211e-05, | |
| "loss": 0.8286241888999939, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6393738711619507, | |
| "grad_norm": 0.5345116258038959, | |
| "learning_rate": 8.155103983671297e-05, | |
| "loss": 0.6163603067398071, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.6405779650812763, | |
| "grad_norm": 0.5693561222753533, | |
| "learning_rate": 8.14737549112462e-05, | |
| "loss": 0.7609665989875793, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.641782059000602, | |
| "grad_norm": 0.5363204766451696, | |
| "learning_rate": 8.139634523761537e-05, | |
| "loss": 0.7703006863594055, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.6429861529199278, | |
| "grad_norm": 0.5018955837394068, | |
| "learning_rate": 8.131881112263845e-05, | |
| "loss": 0.7416508793830872, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6441902468392534, | |
| "grad_norm": 0.5609638717660475, | |
| "learning_rate": 8.12411528736267e-05, | |
| "loss": 0.8192243576049805, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.6453943407585792, | |
| "grad_norm": 0.6780705920762121, | |
| "learning_rate": 8.116337079838337e-05, | |
| "loss": 0.7729198932647705, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.6465984346779049, | |
| "grad_norm": 0.5440984121290051, | |
| "learning_rate": 8.10854652052025e-05, | |
| "loss": 0.6821568012237549, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.6478025285972305, | |
| "grad_norm": 0.5717350810710682, | |
| "learning_rate": 8.100743640286768e-05, | |
| "loss": 0.7128407955169678, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6490066225165563, | |
| "grad_norm": 0.5284572857325969, | |
| "learning_rate": 8.092928470065091e-05, | |
| "loss": 0.7074618339538574, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.650210716435882, | |
| "grad_norm": 0.5074640985182344, | |
| "learning_rate": 8.085101040831122e-05, | |
| "loss": 0.646845817565918, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6514148103552077, | |
| "grad_norm": 0.5543246780629942, | |
| "learning_rate": 8.077261383609363e-05, | |
| "loss": 0.773701548576355, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.6526189042745334, | |
| "grad_norm": 0.6160660268489386, | |
| "learning_rate": 8.069409529472774e-05, | |
| "loss": 0.6822642683982849, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6538229981938591, | |
| "grad_norm": 0.53725601622277, | |
| "learning_rate": 8.061545509542663e-05, | |
| "loss": 0.706203818321228, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.6550270921131849, | |
| "grad_norm": 0.5298470910298363, | |
| "learning_rate": 8.05366935498856e-05, | |
| "loss": 0.7028369903564453, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6562311860325105, | |
| "grad_norm": 0.5289295675125671, | |
| "learning_rate": 8.045781097028083e-05, | |
| "loss": 0.8363953232765198, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.6574352799518363, | |
| "grad_norm": 0.5484514846314679, | |
| "learning_rate": 8.037880766926833e-05, | |
| "loss": 0.8546597361564636, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.658639373871162, | |
| "grad_norm": 0.5743868068932564, | |
| "learning_rate": 8.02996839599825e-05, | |
| "loss": 0.7040009498596191, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.6598434677904876, | |
| "grad_norm": 0.5717016239116167, | |
| "learning_rate": 8.022044015603505e-05, | |
| "loss": 0.78245609998703, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6610475617098134, | |
| "grad_norm": 0.5863267636460782, | |
| "learning_rate": 8.014107657151369e-05, | |
| "loss": 0.8965244293212891, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.6622516556291391, | |
| "grad_norm": 0.5058811889612245, | |
| "learning_rate": 8.006159352098082e-05, | |
| "loss": 0.7318023443222046, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6634557495484648, | |
| "grad_norm": 0.5481044361172038, | |
| "learning_rate": 7.998199131947247e-05, | |
| "loss": 0.8633524775505066, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.6646598434677905, | |
| "grad_norm": 0.5693440196646484, | |
| "learning_rate": 7.990227028249678e-05, | |
| "loss": 0.73511803150177, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6658639373871162, | |
| "grad_norm": 0.5738698013859286, | |
| "learning_rate": 7.982243072603306e-05, | |
| "loss": 0.7095030546188354, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.6670680313064419, | |
| "grad_norm": 0.5798536139502265, | |
| "learning_rate": 7.974247296653028e-05, | |
| "loss": 0.6944142580032349, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6682721252257676, | |
| "grad_norm": 0.5445345284588756, | |
| "learning_rate": 7.966239732090592e-05, | |
| "loss": 0.7495391368865967, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.6694762191450934, | |
| "grad_norm": 0.5038971804023679, | |
| "learning_rate": 7.958220410654475e-05, | |
| "loss": 0.6984772086143494, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.670680313064419, | |
| "grad_norm": 0.5600022210005443, | |
| "learning_rate": 7.95018936412975e-05, | |
| "loss": 0.7748847007751465, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.6718844069837447, | |
| "grad_norm": 0.5147765169482994, | |
| "learning_rate": 7.942146624347964e-05, | |
| "loss": 0.6932211518287659, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6730885009030705, | |
| "grad_norm": 0.5425861579492259, | |
| "learning_rate": 7.93409222318701e-05, | |
| "loss": 0.7832596302032471, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.6742925948223961, | |
| "grad_norm": 0.5209359479363717, | |
| "learning_rate": 7.926026192571007e-05, | |
| "loss": 0.7210221290588379, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6754966887417219, | |
| "grad_norm": 0.5337914370112163, | |
| "learning_rate": 7.917948564470157e-05, | |
| "loss": 0.78125, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.6767007826610476, | |
| "grad_norm": 0.5630510669001023, | |
| "learning_rate": 7.909859370900642e-05, | |
| "loss": 0.656562089920044, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6779048765803732, | |
| "grad_norm": 0.6145722959937302, | |
| "learning_rate": 7.901758643924475e-05, | |
| "loss": 0.7918832302093506, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.679108970499699, | |
| "grad_norm": 0.5595323369440784, | |
| "learning_rate": 7.893646415649384e-05, | |
| "loss": 0.6872172355651855, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6803130644190247, | |
| "grad_norm": 0.559585873258208, | |
| "learning_rate": 7.88552271822869e-05, | |
| "loss": 0.7413808107376099, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.6815171583383504, | |
| "grad_norm": 0.5896344593158469, | |
| "learning_rate": 7.877387583861165e-05, | |
| "loss": 0.7586731910705566, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6827212522576761, | |
| "grad_norm": 0.7785143302301188, | |
| "learning_rate": 7.869241044790915e-05, | |
| "loss": 0.8225888609886169, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.6839253461770018, | |
| "grad_norm": 0.5216852463423214, | |
| "learning_rate": 7.861083133307247e-05, | |
| "loss": 0.7068048715591431, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6851294400963275, | |
| "grad_norm": 0.546179315042222, | |
| "learning_rate": 7.852913881744547e-05, | |
| "loss": 0.7423121333122253, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.6863335340156532, | |
| "grad_norm": 0.603434739035308, | |
| "learning_rate": 7.844733322482145e-05, | |
| "loss": 0.7143921256065369, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.687537627934979, | |
| "grad_norm": 0.5559389093356377, | |
| "learning_rate": 7.836541487944193e-05, | |
| "loss": 0.7010910511016846, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.6887417218543046, | |
| "grad_norm": 0.5487579113202334, | |
| "learning_rate": 7.82833841059953e-05, | |
| "loss": 0.7683477401733398, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6899458157736303, | |
| "grad_norm": 0.5395639066708008, | |
| "learning_rate": 7.820124122961557e-05, | |
| "loss": 0.663609504699707, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.6911499096929561, | |
| "grad_norm": 0.5384530868694132, | |
| "learning_rate": 7.811898657588109e-05, | |
| "loss": 0.6645287275314331, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6923540036122817, | |
| "grad_norm": 0.5220245375313282, | |
| "learning_rate": 7.803662047081323e-05, | |
| "loss": 0.741969883441925, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.6935580975316075, | |
| "grad_norm": 0.5126476994514748, | |
| "learning_rate": 7.795414324087515e-05, | |
| "loss": 0.7474612593650818, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6947621914509332, | |
| "grad_norm": 0.5328593914899424, | |
| "learning_rate": 7.78715552129704e-05, | |
| "loss": 0.8475122451782227, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.6959662853702588, | |
| "grad_norm": 0.5116580634330516, | |
| "learning_rate": 7.778885671444169e-05, | |
| "loss": 0.5925725698471069, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.6971703792895846, | |
| "grad_norm": 0.5209404691403938, | |
| "learning_rate": 7.770604807306966e-05, | |
| "loss": 0.7660278081893921, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.6983744732089103, | |
| "grad_norm": 0.6419678559064697, | |
| "learning_rate": 7.762312961707141e-05, | |
| "loss": 0.8105254769325256, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.699578567128236, | |
| "grad_norm": 0.5291551771982791, | |
| "learning_rate": 7.754010167509935e-05, | |
| "loss": 0.7684112191200256, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.7007826610475617, | |
| "grad_norm": 0.5344021885735301, | |
| "learning_rate": 7.745696457623986e-05, | |
| "loss": 0.7174640893936157, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.7019867549668874, | |
| "grad_norm": 0.5768957782540212, | |
| "learning_rate": 7.73737186500119e-05, | |
| "loss": 0.8031843304634094, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.7031908488862131, | |
| "grad_norm": 0.5426844722988857, | |
| "learning_rate": 7.729036422636589e-05, | |
| "loss": 0.7134600281715393, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.7043949428055388, | |
| "grad_norm": 0.5030850249501789, | |
| "learning_rate": 7.720690163568214e-05, | |
| "loss": 0.6457868218421936, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.7055990367248646, | |
| "grad_norm": 0.6191772999257755, | |
| "learning_rate": 7.712333120876983e-05, | |
| "loss": 0.7789636850357056, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.7068031306441902, | |
| "grad_norm": 0.5321381054419203, | |
| "learning_rate": 7.703965327686544e-05, | |
| "loss": 0.6850986480712891, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.708007224563516, | |
| "grad_norm": 0.525345105210121, | |
| "learning_rate": 7.695586817163163e-05, | |
| "loss": 0.7084580659866333, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.7092113184828417, | |
| "grad_norm": 0.5395663278470533, | |
| "learning_rate": 7.68719762251558e-05, | |
| "loss": 0.6684824824333191, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.7104154124021673, | |
| "grad_norm": 0.5825678154513388, | |
| "learning_rate": 7.678797776994886e-05, | |
| "loss": 0.671932578086853, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.7116195063214931, | |
| "grad_norm": 0.5681195138990598, | |
| "learning_rate": 7.670387313894384e-05, | |
| "loss": 0.7614145278930664, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.7128236002408188, | |
| "grad_norm": 0.5747985438529684, | |
| "learning_rate": 7.661966266549463e-05, | |
| "loss": 0.6186734437942505, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.7140276941601444, | |
| "grad_norm": 0.615632973884984, | |
| "learning_rate": 7.653534668337463e-05, | |
| "loss": 0.7583650350570679, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.7152317880794702, | |
| "grad_norm": 0.5845374532491585, | |
| "learning_rate": 7.645092552677539e-05, | |
| "loss": 0.7116202116012573, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.7164358819987959, | |
| "grad_norm": 0.5194310981235325, | |
| "learning_rate": 7.636639953030541e-05, | |
| "loss": 0.724486231803894, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.7176399759181216, | |
| "grad_norm": 0.5271839381183305, | |
| "learning_rate": 7.628176902898863e-05, | |
| "loss": 0.7952111959457397, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.7188440698374473, | |
| "grad_norm": 0.5466556917732034, | |
| "learning_rate": 7.619703435826328e-05, | |
| "loss": 0.8017375469207764, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.720048163756773, | |
| "grad_norm": 0.5805731516392539, | |
| "learning_rate": 7.61121958539804e-05, | |
| "loss": 0.7721456289291382, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.7212522576760987, | |
| "grad_norm": 0.5750677094113634, | |
| "learning_rate": 7.602725385240268e-05, | |
| "loss": 0.8046045303344727, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.7224563515954244, | |
| "grad_norm": 0.5050120132500692, | |
| "learning_rate": 7.594220869020293e-05, | |
| "loss": 0.718019962310791, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7236604455147502, | |
| "grad_norm": 0.5775475591845226, | |
| "learning_rate": 7.585706070446288e-05, | |
| "loss": 0.681684672832489, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.7248645394340758, | |
| "grad_norm": 0.5650253516457611, | |
| "learning_rate": 7.577181023267185e-05, | |
| "loss": 0.8424392342567444, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.7260686333534015, | |
| "grad_norm": 0.5597171944868315, | |
| "learning_rate": 7.568645761272527e-05, | |
| "loss": 0.7912712693214417, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 0.6549911820787678, | |
| "learning_rate": 7.560100318292355e-05, | |
| "loss": 0.7041141390800476, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.7284768211920529, | |
| "grad_norm": 0.5358008356978227, | |
| "learning_rate": 7.551544728197057e-05, | |
| "loss": 0.7670320272445679, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.7296809151113787, | |
| "grad_norm": 0.4901100224928788, | |
| "learning_rate": 7.542979024897239e-05, | |
| "loss": 0.7009145021438599, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.7308850090307044, | |
| "grad_norm": 0.5905232471499013, | |
| "learning_rate": 7.534403242343595e-05, | |
| "loss": 0.7351090312004089, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.7320891029500302, | |
| "grad_norm": 0.5500568361307169, | |
| "learning_rate": 7.525817414526764e-05, | |
| "loss": 0.7852768301963806, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.7332931968693558, | |
| "grad_norm": 0.5278365030547066, | |
| "learning_rate": 7.517221575477209e-05, | |
| "loss": 0.7615916728973389, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.7344972907886815, | |
| "grad_norm": 0.5586068544844466, | |
| "learning_rate": 7.508615759265059e-05, | |
| "loss": 0.7317056655883789, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.7357013847080073, | |
| "grad_norm": 0.5567556937034017, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.6546356678009033, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.7369054786273329, | |
| "grad_norm": 0.5190616958413415, | |
| "learning_rate": 7.491374331831125e-05, | |
| "loss": 0.6745877861976624, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.7381095725466587, | |
| "grad_norm": 0.5155477634721394, | |
| "learning_rate": 7.482738788946799e-05, | |
| "loss": 0.6520372629165649, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.7393136664659844, | |
| "grad_norm": 0.5335041167853599, | |
| "learning_rate": 7.474093405574527e-05, | |
| "loss": 0.7490137219429016, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.74051776038531, | |
| "grad_norm": 0.7109509898226887, | |
| "learning_rate": 7.465438215980819e-05, | |
| "loss": 0.7756985425949097, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.7417218543046358, | |
| "grad_norm": 0.5955093151487647, | |
| "learning_rate": 7.456773254471053e-05, | |
| "loss": 0.7039395570755005, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7429259482239615, | |
| "grad_norm": 1.7500643108130063, | |
| "learning_rate": 7.448098555389333e-05, | |
| "loss": 0.7679908871650696, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.7441300421432872, | |
| "grad_norm": 0.5689917526414457, | |
| "learning_rate": 7.439414153118364e-05, | |
| "loss": 0.7280757427215576, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.7453341360626129, | |
| "grad_norm": 0.5541120701637754, | |
| "learning_rate": 7.43072008207931e-05, | |
| "loss": 0.7403719425201416, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.7465382299819386, | |
| "grad_norm": 0.5572484684641944, | |
| "learning_rate": 7.422016376731658e-05, | |
| "loss": 0.6732151508331299, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7477423239012643, | |
| "grad_norm": 0.6112781170222843, | |
| "learning_rate": 7.413303071573077e-05, | |
| "loss": 0.7835999727249146, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.74894641782059, | |
| "grad_norm": 0.5389600618950466, | |
| "learning_rate": 7.404580201139286e-05, | |
| "loss": 0.7648690938949585, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.7501505117399158, | |
| "grad_norm": 0.5604655172081615, | |
| "learning_rate": 7.395847800003925e-05, | |
| "loss": 0.7572316527366638, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.7513546056592414, | |
| "grad_norm": 0.6075035154379582, | |
| "learning_rate": 7.387105902778397e-05, | |
| "loss": 0.7459697127342224, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.7525586995785671, | |
| "grad_norm": 0.5278952094466931, | |
| "learning_rate": 7.378354544111755e-05, | |
| "loss": 0.6718525290489197, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.7537627934978929, | |
| "grad_norm": 0.5584227775244037, | |
| "learning_rate": 7.36959375869054e-05, | |
| "loss": 0.8716173768043518, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.7549668874172185, | |
| "grad_norm": 0.5530155578773712, | |
| "learning_rate": 7.360823581238672e-05, | |
| "loss": 0.7087276577949524, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.7561709813365443, | |
| "grad_norm": 0.5059165745763609, | |
| "learning_rate": 7.352044046517285e-05, | |
| "loss": 0.6190001964569092, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.75737507525587, | |
| "grad_norm": 0.5098108817484567, | |
| "learning_rate": 7.343255189324605e-05, | |
| "loss": 0.7209186553955078, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.7585791691751956, | |
| "grad_norm": 0.5806534819653945, | |
| "learning_rate": 7.334457044495811e-05, | |
| "loss": 0.7904737591743469, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.7597832630945214, | |
| "grad_norm": 0.5678941011790182, | |
| "learning_rate": 7.325649646902887e-05, | |
| "loss": 0.7746214270591736, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.7609873570138471, | |
| "grad_norm": 0.53499301398341, | |
| "learning_rate": 7.316833031454498e-05, | |
| "loss": 0.81767737865448, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7621914509331728, | |
| "grad_norm": 0.6293538353705853, | |
| "learning_rate": 7.30800723309584e-05, | |
| "loss": 0.754915177822113, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.7633955448524985, | |
| "grad_norm": 0.5024549487657861, | |
| "learning_rate": 7.299172286808511e-05, | |
| "loss": 0.6980667114257812, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7645996387718242, | |
| "grad_norm": 0.5808912591228869, | |
| "learning_rate": 7.290328227610362e-05, | |
| "loss": 0.7346571087837219, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.7658037326911499, | |
| "grad_norm": 0.5040533014385249, | |
| "learning_rate": 7.281475090555365e-05, | |
| "loss": 0.7573859095573425, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.7670078266104756, | |
| "grad_norm": 0.6000308844739162, | |
| "learning_rate": 7.272612910733475e-05, | |
| "loss": 0.734082818031311, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.7682119205298014, | |
| "grad_norm": 0.5704625126191725, | |
| "learning_rate": 7.263741723270486e-05, | |
| "loss": 0.7449295520782471, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.769416014449127, | |
| "grad_norm": 0.5466539737703434, | |
| "learning_rate": 7.254861563327896e-05, | |
| "loss": 0.6105548143386841, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.7706201083684527, | |
| "grad_norm": 0.5771368508028323, | |
| "learning_rate": 7.245972466102766e-05, | |
| "loss": 0.6553152799606323, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.7718242022877785, | |
| "grad_norm": 0.6128766973307496, | |
| "learning_rate": 7.237074466827579e-05, | |
| "loss": 0.75093674659729, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.7730282962071041, | |
| "grad_norm": 0.5821257642272554, | |
| "learning_rate": 7.228167600770101e-05, | |
| "loss": 0.6564959287643433, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7742323901264299, | |
| "grad_norm": 0.5623716196101178, | |
| "learning_rate": 7.219251903233246e-05, | |
| "loss": 0.8025193214416504, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.7754364840457556, | |
| "grad_norm": 0.5696722543963053, | |
| "learning_rate": 7.210327409554926e-05, | |
| "loss": 0.7683711051940918, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7766405779650812, | |
| "grad_norm": 0.5976679685755757, | |
| "learning_rate": 7.201394155107928e-05, | |
| "loss": 0.7830510139465332, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.777844671884407, | |
| "grad_norm": 0.5383400096273454, | |
| "learning_rate": 7.192452175299748e-05, | |
| "loss": 0.7331225275993347, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7790487658037327, | |
| "grad_norm": 0.5804872750816046, | |
| "learning_rate": 7.183501505572478e-05, | |
| "loss": 0.7439867258071899, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.7802528597230584, | |
| "grad_norm": 0.5709872397445639, | |
| "learning_rate": 7.174542181402646e-05, | |
| "loss": 0.7664347887039185, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7814569536423841, | |
| "grad_norm": 0.6093539887624945, | |
| "learning_rate": 7.165574238301085e-05, | |
| "loss": 0.7767341136932373, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.7826610475617098, | |
| "grad_norm": 0.6382372128542977, | |
| "learning_rate": 7.15659771181279e-05, | |
| "loss": 0.780250608921051, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7838651414810355, | |
| "grad_norm": 0.5817645984828969, | |
| "learning_rate": 7.147612637516775e-05, | |
| "loss": 0.7129443883895874, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.7850692354003612, | |
| "grad_norm": 0.5829365363830763, | |
| "learning_rate": 7.138619051025935e-05, | |
| "loss": 0.7688045501708984, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.786273329319687, | |
| "grad_norm": 0.5931149520853964, | |
| "learning_rate": 7.129616987986905e-05, | |
| "loss": 0.7989938259124756, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.7874774232390126, | |
| "grad_norm": 0.6079357435489189, | |
| "learning_rate": 7.120606484079912e-05, | |
| "loss": 0.7031389474868774, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.7886815171583383, | |
| "grad_norm": 0.5836757921005798, | |
| "learning_rate": 7.111587575018648e-05, | |
| "loss": 0.7208900451660156, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.7898856110776641, | |
| "grad_norm": 0.5706981336520551, | |
| "learning_rate": 7.102560296550109e-05, | |
| "loss": 0.6882627010345459, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.7910897049969897, | |
| "grad_norm": 0.5801405663826479, | |
| "learning_rate": 7.093524684454471e-05, | |
| "loss": 0.7287724018096924, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.7922937989163155, | |
| "grad_norm": 0.5112728165578423, | |
| "learning_rate": 7.084480774544937e-05, | |
| "loss": 0.7950488328933716, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7934978928356412, | |
| "grad_norm": 0.5906716881082211, | |
| "learning_rate": 7.075428602667602e-05, | |
| "loss": 0.8319511413574219, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.7947019867549668, | |
| "grad_norm": 0.552277030288998, | |
| "learning_rate": 7.066368204701306e-05, | |
| "loss": 0.7356606125831604, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7959060806742926, | |
| "grad_norm": 0.5491799500162513, | |
| "learning_rate": 7.057299616557493e-05, | |
| "loss": 0.6787431240081787, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.7971101745936183, | |
| "grad_norm": 0.5447685751365484, | |
| "learning_rate": 7.048222874180072e-05, | |
| "loss": 0.6301302313804626, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.798314268512944, | |
| "grad_norm": 0.5349845222628344, | |
| "learning_rate": 7.039138013545265e-05, | |
| "loss": 0.7335139513015747, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.7995183624322697, | |
| "grad_norm": 0.5308636878826329, | |
| "learning_rate": 7.030045070661484e-05, | |
| "loss": 0.701374888420105, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.8007224563515954, | |
| "grad_norm": 0.5922816809783011, | |
| "learning_rate": 7.020944081569158e-05, | |
| "loss": 0.7663730978965759, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.8019265502709211, | |
| "grad_norm": 0.5605056625652637, | |
| "learning_rate": 7.011835082340625e-05, | |
| "loss": 0.7345573306083679, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.8031306441902468, | |
| "grad_norm": 0.5851819638490449, | |
| "learning_rate": 7.002718109079964e-05, | |
| "loss": 0.7375038266181946, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.8043347381095726, | |
| "grad_norm": 0.5416749643673157, | |
| "learning_rate": 6.993593197922852e-05, | |
| "loss": 0.7487154603004456, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.8055388320288982, | |
| "grad_norm": 0.5556726879895357, | |
| "learning_rate": 6.984460385036442e-05, | |
| "loss": 0.7427254915237427, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.8067429259482239, | |
| "grad_norm": 0.6007598069149666, | |
| "learning_rate": 6.975319706619197e-05, | |
| "loss": 0.7750675678253174, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.8079470198675497, | |
| "grad_norm": 0.528992540879131, | |
| "learning_rate": 6.966171198900761e-05, | |
| "loss": 0.6606633067131042, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.8091511137868754, | |
| "grad_norm": 0.5477173596363377, | |
| "learning_rate": 6.957014898141805e-05, | |
| "loss": 0.8152574300765991, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.8103552077062011, | |
| "grad_norm": 0.6121165350927734, | |
| "learning_rate": 6.947850840633892e-05, | |
| "loss": 0.6906253099441528, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.8115593016255268, | |
| "grad_norm": 0.6828142358549254, | |
| "learning_rate": 6.938679062699327e-05, | |
| "loss": 0.7197479009628296, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.8127633955448526, | |
| "grad_norm": 0.600638365736741, | |
| "learning_rate": 6.929499600691014e-05, | |
| "loss": 0.7559309005737305, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.8139674894641782, | |
| "grad_norm": 0.5741362889435193, | |
| "learning_rate": 6.92031249099232e-05, | |
| "loss": 0.7413439154624939, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.8151715833835039, | |
| "grad_norm": 0.5573466696881405, | |
| "learning_rate": 6.911117770016915e-05, | |
| "loss": 0.7520703077316284, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.8163756773028297, | |
| "grad_norm": 0.5530340502427105, | |
| "learning_rate": 6.901915474208644e-05, | |
| "loss": 0.7369641661643982, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.8175797712221553, | |
| "grad_norm": 0.6288799148019949, | |
| "learning_rate": 6.892705640041373e-05, | |
| "loss": 0.6862533688545227, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.818783865141481, | |
| "grad_norm": 0.5705708863157596, | |
| "learning_rate": 6.883488304018844e-05, | |
| "loss": 0.6596237421035767, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.8199879590608068, | |
| "grad_norm": 0.5418277571018996, | |
| "learning_rate": 6.874263502674538e-05, | |
| "loss": 0.7127683758735657, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.8211920529801324, | |
| "grad_norm": 0.5393357810954823, | |
| "learning_rate": 6.86503127257152e-05, | |
| "loss": 0.6932356357574463, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.8223961468994582, | |
| "grad_norm": 0.5697671614974222, | |
| "learning_rate": 6.855791650302305e-05, | |
| "loss": 0.7497154474258423, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.8236002408187839, | |
| "grad_norm": 0.5306596303608581, | |
| "learning_rate": 6.846544672488701e-05, | |
| "loss": 0.8150767087936401, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.8248043347381095, | |
| "grad_norm": 0.5534085843835471, | |
| "learning_rate": 6.837290375781678e-05, | |
| "loss": 0.7602214217185974, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.8260084286574353, | |
| "grad_norm": 0.5529982450683968, | |
| "learning_rate": 6.828028796861207e-05, | |
| "loss": 0.7561876177787781, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.827212522576761, | |
| "grad_norm": 0.5484429780368382, | |
| "learning_rate": 6.818759972436125e-05, | |
| "loss": 0.6599973440170288, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.8284166164960867, | |
| "grad_norm": 0.5776866825371967, | |
| "learning_rate": 6.809483939243992e-05, | |
| "loss": 0.7286278009414673, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.8296207104154124, | |
| "grad_norm": 0.5057955934734665, | |
| "learning_rate": 6.800200734050931e-05, | |
| "loss": 0.7581233978271484, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.8308248043347382, | |
| "grad_norm": 0.5760904906160742, | |
| "learning_rate": 6.790910393651502e-05, | |
| "loss": 0.7653100490570068, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.8320288982540638, | |
| "grad_norm": 0.5984616725447457, | |
| "learning_rate": 6.781612954868538e-05, | |
| "loss": 0.6928480863571167, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.8332329921733895, | |
| "grad_norm": 0.6145944772662347, | |
| "learning_rate": 6.77230845455301e-05, | |
| "loss": 0.8009054660797119, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.8344370860927153, | |
| "grad_norm": 0.6095913953550105, | |
| "learning_rate": 6.762996929583878e-05, | |
| "loss": 0.6875652074813843, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.8356411800120409, | |
| "grad_norm": 0.5090902182729284, | |
| "learning_rate": 6.753678416867944e-05, | |
| "loss": 0.5892850160598755, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.8368452739313667, | |
| "grad_norm": 0.5661616504114457, | |
| "learning_rate": 6.744352953339706e-05, | |
| "loss": 0.8156208395957947, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.8380493678506924, | |
| "grad_norm": 0.5471603662125316, | |
| "learning_rate": 6.735020575961213e-05, | |
| "loss": 0.7717173099517822, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.839253461770018, | |
| "grad_norm": 0.5608336825556588, | |
| "learning_rate": 6.725681321721916e-05, | |
| "loss": 0.7841690182685852, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.8404575556893438, | |
| "grad_norm": 0.5652594612436186, | |
| "learning_rate": 6.716335227638525e-05, | |
| "loss": 0.7450107932090759, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.8416616496086695, | |
| "grad_norm": 0.5184526157601729, | |
| "learning_rate": 6.706982330754858e-05, | |
| "loss": 0.7024678587913513, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.8428657435279951, | |
| "grad_norm": 0.5968287295635879, | |
| "learning_rate": 6.697622668141698e-05, | |
| "loss": 0.774450421333313, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8440698374473209, | |
| "grad_norm": 0.5615067567077281, | |
| "learning_rate": 6.688256276896643e-05, | |
| "loss": 0.6903250217437744, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.8452739313666466, | |
| "grad_norm": 0.5210615684494945, | |
| "learning_rate": 6.678883194143962e-05, | |
| "loss": 0.7323881387710571, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.8464780252859723, | |
| "grad_norm": 0.5936435413810897, | |
| "learning_rate": 6.669503457034446e-05, | |
| "loss": 0.7613493204116821, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.847682119205298, | |
| "grad_norm": 0.5477651842051788, | |
| "learning_rate": 6.660117102745256e-05, | |
| "loss": 0.7256999015808105, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.8488862131246238, | |
| "grad_norm": 0.5931823138528517, | |
| "learning_rate": 6.650724168479789e-05, | |
| "loss": 0.6926670670509338, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.8500903070439494, | |
| "grad_norm": 0.5950387212624304, | |
| "learning_rate": 6.641324691467514e-05, | |
| "loss": 0.7022740840911865, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.8512944009632751, | |
| "grad_norm": 0.5226109301283741, | |
| "learning_rate": 6.63191870896384e-05, | |
| "loss": 0.7763382196426392, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.8524984948826009, | |
| "grad_norm": 0.586021288748115, | |
| "learning_rate": 6.622506258249956e-05, | |
| "loss": 0.6866122484207153, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.8537025888019265, | |
| "grad_norm": 0.5920787573331819, | |
| "learning_rate": 6.61308737663269e-05, | |
| "loss": 0.6568872928619385, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.8549066827212523, | |
| "grad_norm": 0.5991975319472767, | |
| "learning_rate": 6.60366210144436e-05, | |
| "loss": 0.718787670135498, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.856110776640578, | |
| "grad_norm": 0.5160750999640783, | |
| "learning_rate": 6.59423047004262e-05, | |
| "loss": 0.7689209580421448, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.8573148705599036, | |
| "grad_norm": 0.5494485853850889, | |
| "learning_rate": 6.584792519810325e-05, | |
| "loss": 0.6794394850730896, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.8585189644792294, | |
| "grad_norm": 0.5219228062580127, | |
| "learning_rate": 6.57534828815537e-05, | |
| "loss": 0.805499792098999, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.8597230583985551, | |
| "grad_norm": 0.6054632002933689, | |
| "learning_rate": 6.565897812510549e-05, | |
| "loss": 0.6810654997825623, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.8609271523178808, | |
| "grad_norm": 0.5467563560047504, | |
| "learning_rate": 6.556441130333403e-05, | |
| "loss": 0.782832682132721, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.8621312462372065, | |
| "grad_norm": 0.5102699042274735, | |
| "learning_rate": 6.546978279106074e-05, | |
| "loss": 0.7144448757171631, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.8633353401565322, | |
| "grad_norm": 0.5201991055470554, | |
| "learning_rate": 6.537509296335155e-05, | |
| "loss": 0.7194694876670837, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.8645394340758579, | |
| "grad_norm": 0.576056447234189, | |
| "learning_rate": 6.528034219551543e-05, | |
| "loss": 0.6905584335327148, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8657435279951836, | |
| "grad_norm": 0.6464466934483798, | |
| "learning_rate": 6.518553086310285e-05, | |
| "loss": 0.6729954481124878, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.8669476219145094, | |
| "grad_norm": 0.5594342791837297, | |
| "learning_rate": 6.509065934190437e-05, | |
| "loss": 0.8003696203231812, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.868151715833835, | |
| "grad_norm": 0.601593304386689, | |
| "learning_rate": 6.499572800794911e-05, | |
| "loss": 0.6588794589042664, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.8693558097531607, | |
| "grad_norm": 0.5565882109192302, | |
| "learning_rate": 6.490073723750326e-05, | |
| "loss": 0.7036632895469666, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.8705599036724865, | |
| "grad_norm": 0.5612226727107508, | |
| "learning_rate": 6.480568740706856e-05, | |
| "loss": 0.621507465839386, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.8717639975918121, | |
| "grad_norm": 0.522246530372074, | |
| "learning_rate": 6.471057889338089e-05, | |
| "loss": 0.6701735258102417, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.8729680915111379, | |
| "grad_norm": 0.5426527244446435, | |
| "learning_rate": 6.461541207340866e-05, | |
| "loss": 0.7284058332443237, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.8741721854304636, | |
| "grad_norm": 0.5593465073284523, | |
| "learning_rate": 6.452018732435145e-05, | |
| "loss": 0.8232830762863159, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.8753762793497892, | |
| "grad_norm": 0.5342600006913032, | |
| "learning_rate": 6.442490502363838e-05, | |
| "loss": 0.7485530972480774, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.876580373269115, | |
| "grad_norm": 0.5447449625195914, | |
| "learning_rate": 6.432956554892675e-05, | |
| "loss": 0.7344603538513184, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8777844671884407, | |
| "grad_norm": 0.5322477025460601, | |
| "learning_rate": 6.42341692781004e-05, | |
| "loss": 0.6612800359725952, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.8789885611077664, | |
| "grad_norm": 0.5831757188647365, | |
| "learning_rate": 6.413871658926833e-05, | |
| "loss": 0.6933920979499817, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8801926550270921, | |
| "grad_norm": 0.5299832721565915, | |
| "learning_rate": 6.404320786076317e-05, | |
| "loss": 0.7439042329788208, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.8813967489464178, | |
| "grad_norm": 0.5695875228926351, | |
| "learning_rate": 6.39476434711396e-05, | |
| "loss": 0.7039676308631897, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8826008428657435, | |
| "grad_norm": 0.5832038151328962, | |
| "learning_rate": 6.385202379917297e-05, | |
| "loss": 0.7815375924110413, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.8838049367850692, | |
| "grad_norm": 0.6835928908474225, | |
| "learning_rate": 6.375634922385775e-05, | |
| "loss": 0.7208431959152222, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.885009030704395, | |
| "grad_norm": 0.6587270192090067, | |
| "learning_rate": 6.366062012440599e-05, | |
| "loss": 0.7941803932189941, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.8862131246237207, | |
| "grad_norm": 1.7245765475827708, | |
| "learning_rate": 6.356483688024588e-05, | |
| "loss": 0.671486496925354, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.8874172185430463, | |
| "grad_norm": 0.5557071279745461, | |
| "learning_rate": 6.346899987102019e-05, | |
| "loss": 0.7135335206985474, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.8886213124623721, | |
| "grad_norm": 0.4961299048808865, | |
| "learning_rate": 6.337310947658478e-05, | |
| "loss": 0.714313805103302, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.8898254063816978, | |
| "grad_norm": 0.5141981286677907, | |
| "learning_rate": 6.327716607700719e-05, | |
| "loss": 0.6993675231933594, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.8910295003010235, | |
| "grad_norm": 0.5789671720716641, | |
| "learning_rate": 6.318117005256494e-05, | |
| "loss": 0.7922146320343018, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.8922335942203492, | |
| "grad_norm": 0.5613803261760942, | |
| "learning_rate": 6.308512178374419e-05, | |
| "loss": 0.6571409702301025, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.893437688139675, | |
| "grad_norm": 0.5669966206220561, | |
| "learning_rate": 6.298902165123815e-05, | |
| "loss": 0.688746452331543, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.8946417820590006, | |
| "grad_norm": 0.5047561684826636, | |
| "learning_rate": 6.289287003594564e-05, | |
| "loss": 0.7370304465293884, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.8958458759783263, | |
| "grad_norm": 0.5563270777067858, | |
| "learning_rate": 6.279666731896946e-05, | |
| "loss": 0.7728006839752197, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.8970499698976521, | |
| "grad_norm": 0.6115646897321452, | |
| "learning_rate": 6.270041388161503e-05, | |
| "loss": 0.8055559992790222, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.8982540638169777, | |
| "grad_norm": 0.5628035739900892, | |
| "learning_rate": 6.26041101053888e-05, | |
| "loss": 0.7253992557525635, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.8994581577363034, | |
| "grad_norm": 0.5149674970238032, | |
| "learning_rate": 6.250775637199661e-05, | |
| "loss": 0.692879319190979, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.9006622516556292, | |
| "grad_norm": 0.5056342892241322, | |
| "learning_rate": 6.241135306334254e-05, | |
| "loss": 0.6616844534873962, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.9018663455749548, | |
| "grad_norm": 0.5383802540672363, | |
| "learning_rate": 6.231490056152692e-05, | |
| "loss": 0.6623563766479492, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.9030704394942806, | |
| "grad_norm": 0.5815074149661886, | |
| "learning_rate": 6.221839924884527e-05, | |
| "loss": 0.752768874168396, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9042745334136063, | |
| "grad_norm": 0.5414073533695667, | |
| "learning_rate": 6.21218495077864e-05, | |
| "loss": 0.731033205986023, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.9054786273329319, | |
| "grad_norm": 0.5447709356541176, | |
| "learning_rate": 6.20252517210312e-05, | |
| "loss": 0.6803133487701416, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.9066827212522577, | |
| "grad_norm": 0.5378240303467464, | |
| "learning_rate": 6.192860627145094e-05, | |
| "loss": 0.718227744102478, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.9078868151715834, | |
| "grad_norm": 0.5551071834692822, | |
| "learning_rate": 6.183191354210577e-05, | |
| "loss": 0.7990862727165222, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.9090909090909091, | |
| "grad_norm": 0.5172406034430397, | |
| "learning_rate": 6.173517391624331e-05, | |
| "loss": 0.6609753370285034, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.9102950030102348, | |
| "grad_norm": 0.5736445803572223, | |
| "learning_rate": 6.163838777729699e-05, | |
| "loss": 0.7769580483436584, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.9114990969295605, | |
| "grad_norm": 0.5097735409960078, | |
| "learning_rate": 6.154155550888466e-05, | |
| "loss": 0.6557226181030273, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.9127031908488862, | |
| "grad_norm": 0.6125357429244116, | |
| "learning_rate": 6.144467749480695e-05, | |
| "loss": 0.7219225168228149, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.9139072847682119, | |
| "grad_norm": 0.5709458640554498, | |
| "learning_rate": 6.134775411904581e-05, | |
| "loss": 0.6767279505729675, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.9151113786875377, | |
| "grad_norm": 0.5556672955667038, | |
| "learning_rate": 6.125078576576306e-05, | |
| "loss": 0.6707606911659241, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.9163154726068633, | |
| "grad_norm": 0.5996100778071993, | |
| "learning_rate": 6.115377281929867e-05, | |
| "loss": 0.7439418435096741, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.917519566526189, | |
| "grad_norm": 0.6026597255285467, | |
| "learning_rate": 6.105671566416947e-05, | |
| "loss": 0.7168517112731934, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.9187236604455148, | |
| "grad_norm": 0.5205823898203639, | |
| "learning_rate": 6.0959614685067444e-05, | |
| "loss": 0.6558011770248413, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.9199277543648404, | |
| "grad_norm": 0.5171716858750717, | |
| "learning_rate": 6.086247026685832e-05, | |
| "loss": 0.6740440726280212, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.9211318482841662, | |
| "grad_norm": 0.5352969310926585, | |
| "learning_rate": 6.0765282794579934e-05, | |
| "loss": 0.6668114066123962, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.9223359422034919, | |
| "grad_norm": 0.6505808967515653, | |
| "learning_rate": 6.066805265344084e-05, | |
| "loss": 0.8477667570114136, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.9235400361228175, | |
| "grad_norm": 0.6410375604839043, | |
| "learning_rate": 6.0570780228818705e-05, | |
| "loss": 0.7770013213157654, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.9247441300421433, | |
| "grad_norm": 0.5673822560541372, | |
| "learning_rate": 6.047346590625872e-05, | |
| "loss": 0.7360138893127441, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.925948223961469, | |
| "grad_norm": 0.5557896204884362, | |
| "learning_rate": 6.0376110071472234e-05, | |
| "loss": 0.6802233457565308, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.9271523178807947, | |
| "grad_norm": 0.52113730340189, | |
| "learning_rate": 6.027871311033506e-05, | |
| "loss": 0.7123029828071594, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.9283564118001204, | |
| "grad_norm": 0.5602500741181295, | |
| "learning_rate": 6.0181275408886064e-05, | |
| "loss": 0.7553243637084961, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.9295605057194462, | |
| "grad_norm": 0.5812719342758731, | |
| "learning_rate": 6.008379735332556e-05, | |
| "loss": 0.8514397144317627, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.9307645996387718, | |
| "grad_norm": 0.5301079378776876, | |
| "learning_rate": 5.998627933001381e-05, | |
| "loss": 0.7405595779418945, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.9319686935580975, | |
| "grad_norm": 0.5524992358350457, | |
| "learning_rate": 5.988872172546952e-05, | |
| "loss": 0.6502660512924194, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.9331727874774233, | |
| "grad_norm": 0.6259792675457059, | |
| "learning_rate": 5.979112492636824e-05, | |
| "loss": 0.7457438111305237, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.9343768813967489, | |
| "grad_norm": 0.5985676283825714, | |
| "learning_rate": 5.9693489319540906e-05, | |
| "loss": 0.7521538734436035, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.9355809753160746, | |
| "grad_norm": 0.5417907461104422, | |
| "learning_rate": 5.959581529197225e-05, | |
| "loss": 0.6997593641281128, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.9367850692354004, | |
| "grad_norm": 0.5797230846074595, | |
| "learning_rate": 5.9498103230799274e-05, | |
| "loss": 0.802728533744812, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.937989163154726, | |
| "grad_norm": 0.5235298813353869, | |
| "learning_rate": 5.940035352330975e-05, | |
| "loss": 0.755760133266449, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.9391932570740518, | |
| "grad_norm": 0.5919572468116551, | |
| "learning_rate": 5.9302566556940654e-05, | |
| "loss": 0.7489716410636902, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.9403973509933775, | |
| "grad_norm": 0.5506448343636869, | |
| "learning_rate": 5.9204742719276676e-05, | |
| "loss": 0.6037186980247498, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.9416014449127031, | |
| "grad_norm": 0.565008922820836, | |
| "learning_rate": 5.910688239804857e-05, | |
| "loss": 0.7465704679489136, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.9428055388320289, | |
| "grad_norm": 0.5693013431442876, | |
| "learning_rate": 5.900898598113177e-05, | |
| "loss": 0.7020426392555237, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.9440096327513546, | |
| "grad_norm": 0.5513848988904508, | |
| "learning_rate": 5.891105385654474e-05, | |
| "loss": 0.6874203681945801, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.9452137266706803, | |
| "grad_norm": 0.5317564624539481, | |
| "learning_rate": 5.881308641244747e-05, | |
| "loss": 0.6125369668006897, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.946417820590006, | |
| "grad_norm": 0.5804792703313383, | |
| "learning_rate": 5.871508403713997e-05, | |
| "loss": 0.7148929238319397, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.9476219145093318, | |
| "grad_norm": 0.5364815284480262, | |
| "learning_rate": 5.861704711906067e-05, | |
| "loss": 0.7103593349456787, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.9488260084286574, | |
| "grad_norm": 0.6044595597833547, | |
| "learning_rate": 5.8518976046784944e-05, | |
| "loss": 0.6965406537055969, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.9500301023479831, | |
| "grad_norm": 0.5212214917819449, | |
| "learning_rate": 5.842087120902351e-05, | |
| "loss": 0.7151713371276855, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.9512341962673089, | |
| "grad_norm": 0.5559680847527447, | |
| "learning_rate": 5.832273299462092e-05, | |
| "loss": 0.6795516014099121, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.9524382901866345, | |
| "grad_norm": 0.548780921578, | |
| "learning_rate": 5.8224561792554036e-05, | |
| "loss": 0.7641539573669434, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.9536423841059603, | |
| "grad_norm": 0.5608091695330082, | |
| "learning_rate": 5.812635799193046e-05, | |
| "loss": 0.7201707363128662, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.954846478025286, | |
| "grad_norm": 0.5826010098132147, | |
| "learning_rate": 5.802812198198699e-05, | |
| "loss": 0.7076339721679688, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.9560505719446116, | |
| "grad_norm": 0.5753158331848542, | |
| "learning_rate": 5.792985415208809e-05, | |
| "loss": 0.709479033946991, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.9572546658639374, | |
| "grad_norm": 0.5566546411115479, | |
| "learning_rate": 5.783155489172437e-05, | |
| "loss": 0.770931601524353, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.9584587597832631, | |
| "grad_norm": 0.599083438626808, | |
| "learning_rate": 5.773322459051098e-05, | |
| "loss": 0.6527454257011414, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.9596628537025887, | |
| "grad_norm": 0.6290348495481346, | |
| "learning_rate": 5.763486363818613e-05, | |
| "loss": 0.7638934254646301, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.9608669476219145, | |
| "grad_norm": 0.5846705546999525, | |
| "learning_rate": 5.75364724246095e-05, | |
| "loss": 0.8522746562957764, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.9620710415412402, | |
| "grad_norm": 0.5249812879164455, | |
| "learning_rate": 5.743805133976071e-05, | |
| "loss": 0.7056288123130798, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.963275135460566, | |
| "grad_norm": 0.5870806629180437, | |
| "learning_rate": 5.733960077373781e-05, | |
| "loss": 0.7362521886825562, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9644792293798916, | |
| "grad_norm": 0.5992634913094184, | |
| "learning_rate": 5.7241121116755646e-05, | |
| "loss": 0.7288447618484497, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.9656833232992174, | |
| "grad_norm": 0.5469756167657702, | |
| "learning_rate": 5.714261275914442e-05, | |
| "loss": 0.7934062480926514, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.9668874172185431, | |
| "grad_norm": 0.6496356256264952, | |
| "learning_rate": 5.704407609134805e-05, | |
| "loss": 0.8269863724708557, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.9680915111378687, | |
| "grad_norm": 0.5037971848741302, | |
| "learning_rate": 5.694551150392271e-05, | |
| "loss": 0.7017319202423096, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.9692956050571945, | |
| "grad_norm": 0.5581416866027927, | |
| "learning_rate": 5.684691938753517e-05, | |
| "loss": 0.6454797387123108, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.9704996989765202, | |
| "grad_norm": 0.5226184565373623, | |
| "learning_rate": 5.674830013296137e-05, | |
| "loss": 0.6731082201004028, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.9717037928958459, | |
| "grad_norm": 0.5182166828064974, | |
| "learning_rate": 5.664965413108481e-05, | |
| "loss": 0.6522151231765747, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.9729078868151716, | |
| "grad_norm": 0.5685770883500431, | |
| "learning_rate": 5.655098177289496e-05, | |
| "loss": 0.7161366939544678, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.9741119807344973, | |
| "grad_norm": 0.5555404388639458, | |
| "learning_rate": 5.6452283449485774e-05, | |
| "loss": 0.7227890491485596, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.975316074653823, | |
| "grad_norm": 0.5622492078354654, | |
| "learning_rate": 5.635355955205416e-05, | |
| "loss": 0.7163590788841248, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9765201685731487, | |
| "grad_norm": 0.4986516512531464, | |
| "learning_rate": 5.625481047189835e-05, | |
| "loss": 0.5898875594139099, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.9777242624924745, | |
| "grad_norm": 0.5559389458013995, | |
| "learning_rate": 5.6156036600416385e-05, | |
| "loss": 0.644203245639801, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9789283564118001, | |
| "grad_norm": 0.5577887920278128, | |
| "learning_rate": 5.60572383291046e-05, | |
| "loss": 0.7715066075325012, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.9801324503311258, | |
| "grad_norm": 0.5694954155998118, | |
| "learning_rate": 5.595841604955601e-05, | |
| "loss": 0.7114702463150024, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9813365442504516, | |
| "grad_norm": 0.5728384268780161, | |
| "learning_rate": 5.585957015345882e-05, | |
| "loss": 0.7500208020210266, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.9825406381697772, | |
| "grad_norm": 0.5754460458927252, | |
| "learning_rate": 5.5760701032594775e-05, | |
| "loss": 0.8016777634620667, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.983744732089103, | |
| "grad_norm": 0.5378230131701004, | |
| "learning_rate": 5.566180907883777e-05, | |
| "loss": 0.6485020518302917, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.9849488260084287, | |
| "grad_norm": 0.6119817727352167, | |
| "learning_rate": 5.556289468415212e-05, | |
| "loss": 0.7693502306938171, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.9861529199277543, | |
| "grad_norm": 0.5318784574757923, | |
| "learning_rate": 5.546395824059113e-05, | |
| "loss": 0.7719030380249023, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.9873570138470801, | |
| "grad_norm": 0.5867784164801246, | |
| "learning_rate": 5.536500014029547e-05, | |
| "loss": 0.7008310556411743, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.9885611077664058, | |
| "grad_norm": 0.5069862861453538, | |
| "learning_rate": 5.5266020775491654e-05, | |
| "loss": 0.6411416530609131, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.9897652016857315, | |
| "grad_norm": 0.5834044578675054, | |
| "learning_rate": 5.5167020538490514e-05, | |
| "loss": 0.717340350151062, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.9909692956050572, | |
| "grad_norm": 0.5379644400350362, | |
| "learning_rate": 5.506799982168553e-05, | |
| "loss": 0.6762273907661438, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.9921733895243829, | |
| "grad_norm": 0.529911656567608, | |
| "learning_rate": 5.496895901755145e-05, | |
| "loss": 0.7467926740646362, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.9933774834437086, | |
| "grad_norm": 0.5518942402789531, | |
| "learning_rate": 5.486989851864258e-05, | |
| "loss": 0.7194281220436096, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.9945815773630343, | |
| "grad_norm": 0.5498490180172129, | |
| "learning_rate": 5.47708187175913e-05, | |
| "loss": 0.7484912872314453, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.9957856712823601, | |
| "grad_norm": 0.5493140404581905, | |
| "learning_rate": 5.4671720007106507e-05, | |
| "loss": 0.667789876461029, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.9969897652016857, | |
| "grad_norm": 0.5254892526478596, | |
| "learning_rate": 5.4572602779972006e-05, | |
| "loss": 0.6494565606117249, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.9981938591210114, | |
| "grad_norm": 0.5252729042272404, | |
| "learning_rate": 5.447346742904508e-05, | |
| "loss": 0.6520144939422607, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.9993979530403372, | |
| "grad_norm": 0.5079551233357467, | |
| "learning_rate": 5.437431434725473e-05, | |
| "loss": 0.7297295928001404, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.7548489418432899, | |
| "learning_rate": 5.427514392760034e-05, | |
| "loss": 0.7330588102340698, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.0012040939193256, | |
| "grad_norm": 0.48262108535450576, | |
| "learning_rate": 5.417595656314997e-05, | |
| "loss": 0.6693322658538818, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.0024081878386515, | |
| "grad_norm": 0.5473512895400998, | |
| "learning_rate": 5.40767526470388e-05, | |
| "loss": 0.577499270439148, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.0036122817579771, | |
| "grad_norm": 0.5567998652776117, | |
| "learning_rate": 5.39775325724677e-05, | |
| "loss": 0.7308440208435059, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.0048163756773028, | |
| "grad_norm": 0.4710834471702117, | |
| "learning_rate": 5.3878296732701515e-05, | |
| "loss": 0.5792309045791626, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.0060204695966286, | |
| "grad_norm": 0.499021670786511, | |
| "learning_rate": 5.377904552106763e-05, | |
| "loss": 0.5693698525428772, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.0072245635159542, | |
| "grad_norm": 0.5243691549099818, | |
| "learning_rate": 5.367977933095428e-05, | |
| "loss": 0.7172250747680664, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.0084286574352799, | |
| "grad_norm": 0.5110304762735773, | |
| "learning_rate": 5.3580498555809163e-05, | |
| "loss": 0.6046251654624939, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.0096327513546057, | |
| "grad_norm": 0.5114687722760671, | |
| "learning_rate": 5.348120358913773e-05, | |
| "loss": 0.6995025277137756, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.0108368452739314, | |
| "grad_norm": 0.5912572180109141, | |
| "learning_rate": 5.338189482450167e-05, | |
| "loss": 0.616209089756012, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.012040939193257, | |
| "grad_norm": 0.5140400551380612, | |
| "learning_rate": 5.3282572655517416e-05, | |
| "loss": 0.5996021628379822, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.0132450331125828, | |
| "grad_norm": 0.5391679793572282, | |
| "learning_rate": 5.318323747585444e-05, | |
| "loss": 0.7038933038711548, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.0144491270319085, | |
| "grad_norm": 0.5193952980537593, | |
| "learning_rate": 5.308388967923391e-05, | |
| "loss": 0.5445797443389893, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.0156532209512341, | |
| "grad_norm": 0.5620145760135244, | |
| "learning_rate": 5.298452965942687e-05, | |
| "loss": 0.5499206781387329, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.01685731487056, | |
| "grad_norm": 0.5390761246484093, | |
| "learning_rate": 5.2885157810252915e-05, | |
| "loss": 0.4958648085594177, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.0180614087898856, | |
| "grad_norm": 0.5925470805964254, | |
| "learning_rate": 5.278577452557845e-05, | |
| "loss": 0.6143114566802979, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.0192655027092112, | |
| "grad_norm": 0.5570967510117002, | |
| "learning_rate": 5.2686380199315244e-05, | |
| "loss": 0.6284155249595642, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.020469596628537, | |
| "grad_norm": 0.5829210418808195, | |
| "learning_rate": 5.2586975225418854e-05, | |
| "loss": 0.6126077175140381, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.0216736905478627, | |
| "grad_norm": 0.663542970695612, | |
| "learning_rate": 5.248755999788699e-05, | |
| "loss": 0.5881949663162231, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.0228777844671884, | |
| "grad_norm": 0.508265452000293, | |
| "learning_rate": 5.2388134910758015e-05, | |
| "loss": 0.49111035466194153, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.0240818783865142, | |
| "grad_norm": 0.5947010917583964, | |
| "learning_rate": 5.2288700358109375e-05, | |
| "loss": 0.5964053869247437, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.0252859723058398, | |
| "grad_norm": 0.6478106165020875, | |
| "learning_rate": 5.218925673405607e-05, | |
| "loss": 0.5837634205818176, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.0264900662251655, | |
| "grad_norm": 0.5566597298872943, | |
| "learning_rate": 5.208980443274899e-05, | |
| "loss": 0.6266411542892456, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.0276941601444913, | |
| "grad_norm": 0.6119087058605345, | |
| "learning_rate": 5.199034384837345e-05, | |
| "loss": 0.5852352380752563, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.028898254063817, | |
| "grad_norm": 0.5803055714357055, | |
| "learning_rate": 5.189087537514763e-05, | |
| "loss": 0.6915392875671387, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.0301023479831426, | |
| "grad_norm": 0.667773625926801, | |
| "learning_rate": 5.179139940732091e-05, | |
| "loss": 0.6426236629486084, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.0313064419024685, | |
| "grad_norm": 0.6857940311337501, | |
| "learning_rate": 5.169191633917242e-05, | |
| "loss": 0.6479716300964355, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.032510535821794, | |
| "grad_norm": 0.5987681942027812, | |
| "learning_rate": 5.159242656500942e-05, | |
| "loss": 0.5855128765106201, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.0337146297411197, | |
| "grad_norm": 0.6431412742256677, | |
| "learning_rate": 5.149293047916576e-05, | |
| "loss": 0.6104673743247986, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.0349187236604456, | |
| "grad_norm": 0.6469125810000602, | |
| "learning_rate": 5.139342847600028e-05, | |
| "loss": 0.5815907120704651, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.0361228175797712, | |
| "grad_norm": 0.6322944781362547, | |
| "learning_rate": 5.12939209498953e-05, | |
| "loss": 0.6261488199234009, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.0373269114990968, | |
| "grad_norm": 0.5933004058886241, | |
| "learning_rate": 5.119440829525504e-05, | |
| "loss": 0.5986544489860535, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.0385310054184227, | |
| "grad_norm": 0.6017294433054711, | |
| "learning_rate": 5.1094890906504e-05, | |
| "loss": 0.5497260689735413, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.0397350993377483, | |
| "grad_norm": 0.6545529261884225, | |
| "learning_rate": 5.0995369178085484e-05, | |
| "loss": 0.6652652621269226, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.040939193257074, | |
| "grad_norm": 0.5884144368772644, | |
| "learning_rate": 5.0895843504460005e-05, | |
| "loss": 0.5726611614227295, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.0421432871763998, | |
| "grad_norm": 0.612888846785316, | |
| "learning_rate": 5.0796314280103664e-05, | |
| "loss": 0.6019963026046753, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.0433473810957254, | |
| "grad_norm": 0.6341773077017514, | |
| "learning_rate": 5.0696781899506686e-05, | |
| "loss": 0.6352479457855225, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.044551475015051, | |
| "grad_norm": 0.6050053156332715, | |
| "learning_rate": 5.059724675717177e-05, | |
| "loss": 0.6951148509979248, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.045755568934377, | |
| "grad_norm": 0.6092115250909115, | |
| "learning_rate": 5.049770924761259e-05, | |
| "loss": 0.609517514705658, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.0469596628537026, | |
| "grad_norm": 0.6093665118132492, | |
| "learning_rate": 5.039816976535219e-05, | |
| "loss": 0.6204075813293457, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.0481637567730282, | |
| "grad_norm": 0.5416011274391686, | |
| "learning_rate": 5.029862870492142e-05, | |
| "loss": 0.5534996390342712, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.049367850692354, | |
| "grad_norm": 0.5815173865104868, | |
| "learning_rate": 5.0199086460857406e-05, | |
| "loss": 0.6000134944915771, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.0505719446116797, | |
| "grad_norm": 0.6149736209209395, | |
| "learning_rate": 5.0099543427701956e-05, | |
| "loss": 0.6076500415802002, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.0517760385310053, | |
| "grad_norm": 0.6287626380440837, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6155140399932861, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.0529801324503312, | |
| "grad_norm": 0.5717481586151558, | |
| "learning_rate": 4.9900456572298055e-05, | |
| "loss": 0.5186606645584106, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.0541842263696568, | |
| "grad_norm": 0.6226925894026947, | |
| "learning_rate": 4.980091353914259e-05, | |
| "loss": 0.6474608778953552, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.0553883202889824, | |
| "grad_norm": 0.6964903156335773, | |
| "learning_rate": 4.9701371295078603e-05, | |
| "loss": 0.6780328154563904, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.0565924142083083, | |
| "grad_norm": 0.5756463569010563, | |
| "learning_rate": 4.9601830234647824e-05, | |
| "loss": 0.6036153435707092, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.057796508127634, | |
| "grad_norm": 0.6520552306333011, | |
| "learning_rate": 4.950229075238742e-05, | |
| "loss": 0.6786141395568848, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.0590006020469596, | |
| "grad_norm": 0.5848176085725552, | |
| "learning_rate": 4.940275324282824e-05, | |
| "loss": 0.5802556872367859, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.0602046959662854, | |
| "grad_norm": 0.6222612587675024, | |
| "learning_rate": 4.930321810049334e-05, | |
| "loss": 0.551266074180603, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.061408789885611, | |
| "grad_norm": 0.6335949011450209, | |
| "learning_rate": 4.920368571989636e-05, | |
| "loss": 0.5886940956115723, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.062612883804937, | |
| "grad_norm": 0.5668099378869348, | |
| "learning_rate": 4.910415649554001e-05, | |
| "loss": 0.6376104950904846, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.0638169777242625, | |
| "grad_norm": 0.6343326763172722, | |
| "learning_rate": 4.900463082191452e-05, | |
| "loss": 0.5883640050888062, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.0650210716435882, | |
| "grad_norm": 0.6120672237185096, | |
| "learning_rate": 4.890510909349602e-05, | |
| "loss": 0.5998228788375854, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.0662251655629138, | |
| "grad_norm": 0.6466629735077495, | |
| "learning_rate": 4.880559170474499e-05, | |
| "loss": 0.6147492527961731, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.0674292594822397, | |
| "grad_norm": 0.6188477863366557, | |
| "learning_rate": 4.870607905010471e-05, | |
| "loss": 0.5260412096977234, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.0686333534015653, | |
| "grad_norm": 0.6531014500952979, | |
| "learning_rate": 4.860657152399973e-05, | |
| "loss": 0.6408224701881409, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.0698374473208911, | |
| "grad_norm": 0.6356448658174779, | |
| "learning_rate": 4.850706952083426e-05, | |
| "loss": 0.6420581340789795, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.0710415412402168, | |
| "grad_norm": 0.6638429959160749, | |
| "learning_rate": 4.840757343499059e-05, | |
| "loss": 0.5597264766693115, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.0722456351595424, | |
| "grad_norm": 0.6316267115529358, | |
| "learning_rate": 4.83080836608276e-05, | |
| "loss": 0.5977203845977783, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.073449729078868, | |
| "grad_norm": 0.6332312497061402, | |
| "learning_rate": 4.82086005926791e-05, | |
| "loss": 0.5730206370353699, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.074653822998194, | |
| "grad_norm": 0.6036387570021288, | |
| "learning_rate": 4.8109124624852386e-05, | |
| "loss": 0.5876260995864868, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.0758579169175195, | |
| "grad_norm": 0.6209977622958593, | |
| "learning_rate": 4.800965615162655e-05, | |
| "loss": 0.6396963596343994, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.0770620108368454, | |
| "grad_norm": 0.661525637487104, | |
| "learning_rate": 4.791019556725104e-05, | |
| "loss": 0.5814670920372009, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.078266104756171, | |
| "grad_norm": 0.6275103232059692, | |
| "learning_rate": 4.7810743265943955e-05, | |
| "loss": 0.6844623684883118, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.0794701986754967, | |
| "grad_norm": 0.6818646104901908, | |
| "learning_rate": 4.771129964189063e-05, | |
| "loss": 0.6540423631668091, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.0806742925948223, | |
| "grad_norm": 0.6899606024380505, | |
| "learning_rate": 4.7611865089242004e-05, | |
| "loss": 0.6030701398849487, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.0818783865141481, | |
| "grad_norm": 0.6156599517777511, | |
| "learning_rate": 4.751244000211302e-05, | |
| "loss": 0.6412866711616516, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.0830824804334738, | |
| "grad_norm": 0.5724892187762796, | |
| "learning_rate": 4.741302477458116e-05, | |
| "loss": 0.5549468398094177, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.0842865743527996, | |
| "grad_norm": 0.6770803099451937, | |
| "learning_rate": 4.731361980068476e-05, | |
| "loss": 0.5970389246940613, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.0854906682721253, | |
| "grad_norm": 0.6628879473222443, | |
| "learning_rate": 4.7214225474421556e-05, | |
| "loss": 0.6336625814437866, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.086694762191451, | |
| "grad_norm": 0.7118808898216501, | |
| "learning_rate": 4.7114842189747096e-05, | |
| "loss": 0.5842564702033997, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.0878988561107765, | |
| "grad_norm": 0.6808325766019172, | |
| "learning_rate": 4.701547034057313e-05, | |
| "loss": 0.6561811566352844, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.0891029500301024, | |
| "grad_norm": 0.6883360456461192, | |
| "learning_rate": 4.691611032076611e-05, | |
| "loss": 0.5866090059280396, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.090307043949428, | |
| "grad_norm": 0.594597876069142, | |
| "learning_rate": 4.6816762524145565e-05, | |
| "loss": 0.5930664539337158, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.0915111378687539, | |
| "grad_norm": 0.6540482900479212, | |
| "learning_rate": 4.67174273444826e-05, | |
| "loss": 0.6051517724990845, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.0927152317880795, | |
| "grad_norm": 0.6350023068043041, | |
| "learning_rate": 4.6618105175498336e-05, | |
| "loss": 0.5787694454193115, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.0939193257074051, | |
| "grad_norm": 0.6003852495110125, | |
| "learning_rate": 4.6518796410862284e-05, | |
| "loss": 0.474132776260376, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.095123419626731, | |
| "grad_norm": 0.6643572746823975, | |
| "learning_rate": 4.641950144419085e-05, | |
| "loss": 0.5836554765701294, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.0963275135460566, | |
| "grad_norm": 0.6346538134124894, | |
| "learning_rate": 4.632022066904573e-05, | |
| "loss": 0.632318913936615, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.0975316074653823, | |
| "grad_norm": 0.6506542788169488, | |
| "learning_rate": 4.622095447893238e-05, | |
| "loss": 0.6343862414360046, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.098735701384708, | |
| "grad_norm": 0.7055409962557284, | |
| "learning_rate": 4.612170326729849e-05, | |
| "loss": 0.5320890545845032, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.0999397953040337, | |
| "grad_norm": 0.6710841433347294, | |
| "learning_rate": 4.6022467427532316e-05, | |
| "loss": 0.6487467288970947, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.1011438892233594, | |
| "grad_norm": 0.6161335106985909, | |
| "learning_rate": 4.592324735296122e-05, | |
| "loss": 0.5364488363265991, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.1023479831426852, | |
| "grad_norm": 0.6567852610772589, | |
| "learning_rate": 4.582404343685005e-05, | |
| "loss": 0.5512426495552063, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.1035520770620109, | |
| "grad_norm": 0.7683181606199703, | |
| "learning_rate": 4.5724856072399666e-05, | |
| "loss": 0.6094173192977905, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.1047561709813365, | |
| "grad_norm": 0.6291985708032748, | |
| "learning_rate": 4.562568565274528e-05, | |
| "loss": 0.5325632691383362, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.1059602649006623, | |
| "grad_norm": 0.6285445914420223, | |
| "learning_rate": 4.552653257095495e-05, | |
| "loss": 0.6411327123641968, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.107164358819988, | |
| "grad_norm": 0.6778912339737762, | |
| "learning_rate": 4.5427397220028006e-05, | |
| "loss": 0.6963979005813599, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.1083684527393136, | |
| "grad_norm": 0.660219208281244, | |
| "learning_rate": 4.5328279992893505e-05, | |
| "loss": 0.5509607195854187, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.1095725466586395, | |
| "grad_norm": 0.5881472136676991, | |
| "learning_rate": 4.5229181282408705e-05, | |
| "loss": 0.5656740069389343, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.110776640577965, | |
| "grad_norm": 0.6285472375245976, | |
| "learning_rate": 4.513010148135743e-05, | |
| "loss": 0.5325478315353394, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.1119807344972907, | |
| "grad_norm": 0.6573263156024722, | |
| "learning_rate": 4.5031040982448564e-05, | |
| "loss": 0.568873405456543, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.1131848284166166, | |
| "grad_norm": 0.6375892634033876, | |
| "learning_rate": 4.493200017831448e-05, | |
| "loss": 0.5534123182296753, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.1143889223359422, | |
| "grad_norm": 0.6047252869641281, | |
| "learning_rate": 4.48329794615095e-05, | |
| "loss": 0.6317431330680847, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.1155930162552679, | |
| "grad_norm": 0.7466768979825729, | |
| "learning_rate": 4.4733979224508344e-05, | |
| "loss": 0.6036220192909241, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.1167971101745937, | |
| "grad_norm": 0.6792290210916058, | |
| "learning_rate": 4.4634999859704546e-05, | |
| "loss": 0.6010956764221191, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.1180012040939193, | |
| "grad_norm": 0.63632818802347, | |
| "learning_rate": 4.453604175940888e-05, | |
| "loss": 0.6214673519134521, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.119205298013245, | |
| "grad_norm": 0.5890975902667656, | |
| "learning_rate": 4.443710531584789e-05, | |
| "loss": 0.5909805297851562, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.1204093919325708, | |
| "grad_norm": 0.7758035748650709, | |
| "learning_rate": 4.433819092116223e-05, | |
| "loss": 0.6618773937225342, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.1216134858518965, | |
| "grad_norm": 0.6032633893730255, | |
| "learning_rate": 4.423929896740522e-05, | |
| "loss": 0.6046672463417053, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.122817579771222, | |
| "grad_norm": 0.5963867825539027, | |
| "learning_rate": 4.41404298465412e-05, | |
| "loss": 0.557878851890564, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.124021673690548, | |
| "grad_norm": 0.6278341782515403, | |
| "learning_rate": 4.4041583950444004e-05, | |
| "loss": 0.5768659710884094, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.1252257676098736, | |
| "grad_norm": 0.6692340070481475, | |
| "learning_rate": 4.3942761670895416e-05, | |
| "loss": 0.596519410610199, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.1264298615291992, | |
| "grad_norm": 0.6324155300577929, | |
| "learning_rate": 4.384396339958361e-05, | |
| "loss": 0.6091715097427368, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.127633955448525, | |
| "grad_norm": 0.6873308697173252, | |
| "learning_rate": 4.374518952810167e-05, | |
| "loss": 0.6679472923278809, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.1288380493678507, | |
| "grad_norm": 0.60347781133788, | |
| "learning_rate": 4.364644044794585e-05, | |
| "loss": 0.5072171688079834, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.1300421432871763, | |
| "grad_norm": 0.5958005061206001, | |
| "learning_rate": 4.354771655051424e-05, | |
| "loss": 0.45150619745254517, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.1312462372065022, | |
| "grad_norm": 0.6458018431543086, | |
| "learning_rate": 4.344901822710506e-05, | |
| "loss": 0.5927637815475464, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.1324503311258278, | |
| "grad_norm": 0.6299802211614689, | |
| "learning_rate": 4.33503458689152e-05, | |
| "loss": 0.6127684712409973, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.1336544250451535, | |
| "grad_norm": 0.6064791911526438, | |
| "learning_rate": 4.3251699867038634e-05, | |
| "loss": 0.5717106461524963, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.1348585189644793, | |
| "grad_norm": 0.5664911073371133, | |
| "learning_rate": 4.3153080612464835e-05, | |
| "loss": 0.5828589200973511, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.136062612883805, | |
| "grad_norm": 0.6168659838027578, | |
| "learning_rate": 4.305448849607731e-05, | |
| "loss": 0.6533054113388062, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.1372667068031306, | |
| "grad_norm": 0.6620892379351815, | |
| "learning_rate": 4.295592390865194e-05, | |
| "loss": 0.625812292098999, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.1384708007224564, | |
| "grad_norm": 0.627104880201231, | |
| "learning_rate": 4.28573872408556e-05, | |
| "loss": 0.6203385591506958, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.139674894641782, | |
| "grad_norm": 0.6520709720872566, | |
| "learning_rate": 4.2758878883244366e-05, | |
| "loss": 0.6454954147338867, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.1408789885611077, | |
| "grad_norm": 0.5670002197473883, | |
| "learning_rate": 4.2660399226262205e-05, | |
| "loss": 0.6776309013366699, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.1420830824804336, | |
| "grad_norm": 0.6651897379271099, | |
| "learning_rate": 4.256194866023929e-05, | |
| "loss": 0.6165192127227783, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 1.1432871763997592, | |
| "grad_norm": 0.6070318758880671, | |
| "learning_rate": 4.24635275753905e-05, | |
| "loss": 0.6247190833091736, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1444912703190848, | |
| "grad_norm": 0.6594245265551396, | |
| "learning_rate": 4.236513636181389e-05, | |
| "loss": 0.6294478178024292, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 1.1456953642384107, | |
| "grad_norm": 0.6656388910182744, | |
| "learning_rate": 4.2266775409489023e-05, | |
| "loss": 0.6407076716423035, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.1468994581577363, | |
| "grad_norm": 0.6001528475820775, | |
| "learning_rate": 4.2168445108275636e-05, | |
| "loss": 0.5295635461807251, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 1.148103552077062, | |
| "grad_norm": 0.6623205303719578, | |
| "learning_rate": 4.2070145847911915e-05, | |
| "loss": 0.6363142132759094, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.1493076459963878, | |
| "grad_norm": 0.6595593517698526, | |
| "learning_rate": 4.197187801801301e-05, | |
| "loss": 0.5908467769622803, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.1505117399157134, | |
| "grad_norm": 0.6416992225837528, | |
| "learning_rate": 4.187364200806956e-05, | |
| "loss": 0.6078401803970337, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.151715833835039, | |
| "grad_norm": 0.6426590726024174, | |
| "learning_rate": 4.177543820744597e-05, | |
| "loss": 0.5659914016723633, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 1.152919927754365, | |
| "grad_norm": 0.5945058633012985, | |
| "learning_rate": 4.167726700537909e-05, | |
| "loss": 0.5272383689880371, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.1541240216736905, | |
| "grad_norm": 0.6517936478039313, | |
| "learning_rate": 4.157912879097651e-05, | |
| "loss": 0.6436958909034729, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.1553281155930162, | |
| "grad_norm": 0.6378579002832153, | |
| "learning_rate": 4.1481023953215074e-05, | |
| "loss": 0.6222982406616211, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.156532209512342, | |
| "grad_norm": 0.6050181971864277, | |
| "learning_rate": 4.1382952880939346e-05, | |
| "loss": 0.525058925151825, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 1.1577363034316677, | |
| "grad_norm": 0.6211097246331354, | |
| "learning_rate": 4.128491596286004e-05, | |
| "loss": 0.559781014919281, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.1589403973509933, | |
| "grad_norm": 0.6174026620410265, | |
| "learning_rate": 4.118691358755254e-05, | |
| "loss": 0.5967716574668884, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 1.1601444912703192, | |
| "grad_norm": 0.665933154143997, | |
| "learning_rate": 4.108894614345527e-05, | |
| "loss": 0.6097081303596497, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.1613485851896448, | |
| "grad_norm": 0.7893212528385332, | |
| "learning_rate": 4.0991014018868255e-05, | |
| "loss": 0.6505680084228516, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.1625526791089704, | |
| "grad_norm": 0.6246966439954491, | |
| "learning_rate": 4.0893117601951434e-05, | |
| "loss": 0.5621920228004456, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.1637567730282963, | |
| "grad_norm": 0.8222739403785936, | |
| "learning_rate": 4.079525728072334e-05, | |
| "loss": 0.5986034274101257, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 1.164960866947622, | |
| "grad_norm": 0.7050682371518708, | |
| "learning_rate": 4.0697433443059344e-05, | |
| "loss": 0.5873398780822754, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.1661649608669475, | |
| "grad_norm": 0.5796676608087229, | |
| "learning_rate": 4.059964647669025e-05, | |
| "loss": 0.5936042070388794, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 1.1673690547862734, | |
| "grad_norm": 0.787700686593201, | |
| "learning_rate": 4.050189676920075e-05, | |
| "loss": 0.5301788449287415, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.168573148705599, | |
| "grad_norm": 0.6443250874812158, | |
| "learning_rate": 4.0404184708027764e-05, | |
| "loss": 0.6244308948516846, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 1.1697772426249247, | |
| "grad_norm": 0.6420455168036301, | |
| "learning_rate": 4.03065106804591e-05, | |
| "loss": 0.5246211290359497, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.1709813365442505, | |
| "grad_norm": 0.6233558947698492, | |
| "learning_rate": 4.0208875073631767e-05, | |
| "loss": 0.583974301815033, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 1.1721854304635762, | |
| "grad_norm": 0.6674639893979444, | |
| "learning_rate": 4.01112782745305e-05, | |
| "loss": 0.6854992508888245, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.1733895243829018, | |
| "grad_norm": 0.6761020516050178, | |
| "learning_rate": 4.001372066998621e-05, | |
| "loss": 0.47961360216140747, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.1745936183022276, | |
| "grad_norm": 0.6937061007497107, | |
| "learning_rate": 3.9916202646674454e-05, | |
| "loss": 0.5944783091545105, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.1757977122215533, | |
| "grad_norm": 0.7056015316208057, | |
| "learning_rate": 3.981872459111394e-05, | |
| "loss": 0.5598882436752319, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 1.177001806140879, | |
| "grad_norm": 0.6404405450738602, | |
| "learning_rate": 3.9721286889664946e-05, | |
| "loss": 0.576660692691803, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.1782059000602048, | |
| "grad_norm": 0.7104592496085945, | |
| "learning_rate": 3.962388992852778e-05, | |
| "loss": 0.6423761248588562, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 1.1794099939795304, | |
| "grad_norm": 0.6594542729046524, | |
| "learning_rate": 3.9526534093741294e-05, | |
| "loss": 0.629199743270874, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.180614087898856, | |
| "grad_norm": 0.6630058301620763, | |
| "learning_rate": 3.94292197711813e-05, | |
| "loss": 0.5926947593688965, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 1.1818181818181819, | |
| "grad_norm": 0.6813208402691311, | |
| "learning_rate": 3.933194734655916e-05, | |
| "loss": 0.6430405378341675, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.1830222757375075, | |
| "grad_norm": 0.6509982974967221, | |
| "learning_rate": 3.923471720542007e-05, | |
| "loss": 0.6401696801185608, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.1842263696568331, | |
| "grad_norm": 0.6536388401019356, | |
| "learning_rate": 3.9137529733141705e-05, | |
| "loss": 0.6143403053283691, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.185430463576159, | |
| "grad_norm": 0.714504675861702, | |
| "learning_rate": 3.904038531493257e-05, | |
| "loss": 0.7203058004379272, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.1866345574954846, | |
| "grad_norm": 0.6173945491470922, | |
| "learning_rate": 3.894328433583053e-05, | |
| "loss": 0.6278570294380188, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.1878386514148103, | |
| "grad_norm": 0.6701137023889631, | |
| "learning_rate": 3.8846227180701335e-05, | |
| "loss": 0.6109538674354553, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 1.1890427453341361, | |
| "grad_norm": 0.5571428533929209, | |
| "learning_rate": 3.874921423423697e-05, | |
| "loss": 0.5311257839202881, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.1902468392534618, | |
| "grad_norm": 0.6933413663343239, | |
| "learning_rate": 3.86522458809542e-05, | |
| "loss": 0.6332836151123047, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 1.1914509331727874, | |
| "grad_norm": 0.5720889716251273, | |
| "learning_rate": 3.855532250519307e-05, | |
| "loss": 0.5061079263687134, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.1926550270921132, | |
| "grad_norm": 0.6337726108063414, | |
| "learning_rate": 3.845844449111535e-05, | |
| "loss": 0.6507538557052612, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 1.1938591210114389, | |
| "grad_norm": 0.6876564417388028, | |
| "learning_rate": 3.8361612222703015e-05, | |
| "loss": 0.603594183921814, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.1950632149307645, | |
| "grad_norm": 0.62693954157751, | |
| "learning_rate": 3.826482608375671e-05, | |
| "loss": 0.65156090259552, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 1.1962673088500904, | |
| "grad_norm": 0.6174813194954165, | |
| "learning_rate": 3.816808645789425e-05, | |
| "loss": 0.5988015532493591, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.197471402769416, | |
| "grad_norm": 0.6366707975431302, | |
| "learning_rate": 3.8071393728549074e-05, | |
| "loss": 0.6115190982818604, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.1986754966887416, | |
| "grad_norm": 0.5739976440086356, | |
| "learning_rate": 3.79747482789688e-05, | |
| "loss": 0.5073090195655823, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.1998795906080675, | |
| "grad_norm": 0.7299628970418212, | |
| "learning_rate": 3.787815049221361e-05, | |
| "loss": 0.6128576397895813, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 1.2010836845273931, | |
| "grad_norm": 0.5928975154363226, | |
| "learning_rate": 3.778160075115476e-05, | |
| "loss": 0.7040702104568481, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.2022877784467187, | |
| "grad_norm": 0.6764716165306945, | |
| "learning_rate": 3.768509943847309e-05, | |
| "loss": 0.6583726406097412, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 1.2034918723660446, | |
| "grad_norm": 0.7655399104406154, | |
| "learning_rate": 3.758864693665748e-05, | |
| "loss": 0.6048669815063477, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2046959662853702, | |
| "grad_norm": 0.6929632755591547, | |
| "learning_rate": 3.749224362800338e-05, | |
| "loss": 0.5911441445350647, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 1.205900060204696, | |
| "grad_norm": 0.6678428093707727, | |
| "learning_rate": 3.739588989461123e-05, | |
| "loss": 0.5826472043991089, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.2071041541240217, | |
| "grad_norm": 0.5742909832682589, | |
| "learning_rate": 3.729958611838496e-05, | |
| "loss": 0.5282671451568604, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.2083082480433474, | |
| "grad_norm": 0.6593846672932838, | |
| "learning_rate": 3.720333268103055e-05, | |
| "loss": 0.6393536925315857, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.209512341962673, | |
| "grad_norm": 0.645048734233651, | |
| "learning_rate": 3.7107129964054366e-05, | |
| "loss": 0.5161780118942261, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.2107164358819988, | |
| "grad_norm": 0.6521270937616732, | |
| "learning_rate": 3.701097834876185e-05, | |
| "loss": 0.5652678608894348, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.2119205298013245, | |
| "grad_norm": 0.719291801367232, | |
| "learning_rate": 3.691487821625583e-05, | |
| "loss": 0.6383004188537598, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.2131246237206503, | |
| "grad_norm": 0.6723641088759535, | |
| "learning_rate": 3.6818829947435076e-05, | |
| "loss": 0.6614134311676025, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.214328717639976, | |
| "grad_norm": 0.6446497075244803, | |
| "learning_rate": 3.672283392299282e-05, | |
| "loss": 0.5248739719390869, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 1.2155328115593016, | |
| "grad_norm": 0.6777640011121145, | |
| "learning_rate": 3.66268905234152e-05, | |
| "loss": 0.6154916286468506, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.2167369054786272, | |
| "grad_norm": 0.6609459633009753, | |
| "learning_rate": 3.653100012897983e-05, | |
| "loss": 0.6184529066085815, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 1.217940999397953, | |
| "grad_norm": 0.6888385737563062, | |
| "learning_rate": 3.643516311975413e-05, | |
| "loss": 0.6395118236541748, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.2191450933172787, | |
| "grad_norm": 0.6490466754073637, | |
| "learning_rate": 3.633937987559402e-05, | |
| "loss": 0.5476089715957642, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 1.2203491872366046, | |
| "grad_norm": 0.7441213710432538, | |
| "learning_rate": 3.624365077614226e-05, | |
| "loss": 0.5452324748039246, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.2215532811559302, | |
| "grad_norm": 0.6645629091016498, | |
| "learning_rate": 3.614797620082703e-05, | |
| "loss": 0.5184590220451355, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.2227573750752558, | |
| "grad_norm": 0.7419621941545511, | |
| "learning_rate": 3.605235652886042e-05, | |
| "loss": 0.6348177194595337, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.2239614689945815, | |
| "grad_norm": 0.6707127536069359, | |
| "learning_rate": 3.595679213923685e-05, | |
| "loss": 0.4938550591468811, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 1.2251655629139073, | |
| "grad_norm": 0.6315829793918256, | |
| "learning_rate": 3.586128341073167e-05, | |
| "loss": 0.5749923586845398, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.226369656833233, | |
| "grad_norm": 0.6331477125983097, | |
| "learning_rate": 3.57658307218996e-05, | |
| "loss": 0.5916865468025208, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 1.2275737507525588, | |
| "grad_norm": 0.7846749860867426, | |
| "learning_rate": 3.567043445107326e-05, | |
| "loss": 0.5363922715187073, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.2287778446718844, | |
| "grad_norm": 0.6965711425754788, | |
| "learning_rate": 3.5575094976361625e-05, | |
| "loss": 0.6076053380966187, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 1.22998193859121, | |
| "grad_norm": 0.6685618608419817, | |
| "learning_rate": 3.5479812675648575e-05, | |
| "loss": 0.5980309844017029, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.2311860325105357, | |
| "grad_norm": 0.5858609515515356, | |
| "learning_rate": 3.5384587926591355e-05, | |
| "loss": 0.5327468514442444, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 1.2323901264298616, | |
| "grad_norm": 0.5918812910680888, | |
| "learning_rate": 3.5289421106619126e-05, | |
| "loss": 0.5000705718994141, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.2335942203491872, | |
| "grad_norm": 0.6210027581503564, | |
| "learning_rate": 3.519431259293147e-05, | |
| "loss": 0.5952237248420715, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.234798314268513, | |
| "grad_norm": 0.7034937909157433, | |
| "learning_rate": 3.509926276249676e-05, | |
| "loss": 0.6087906956672668, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.2360024081878387, | |
| "grad_norm": 0.6839405989170235, | |
| "learning_rate": 3.500427199205091e-05, | |
| "loss": 0.5325309038162231, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 1.2372065021071643, | |
| "grad_norm": 0.6182421717192286, | |
| "learning_rate": 3.4909340658095646e-05, | |
| "loss": 0.580743134021759, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.23841059602649, | |
| "grad_norm": 0.7459897660008593, | |
| "learning_rate": 3.4814469136897165e-05, | |
| "loss": 0.6250452995300293, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 1.2396146899458158, | |
| "grad_norm": 0.7099900777127174, | |
| "learning_rate": 3.471965780448461e-05, | |
| "loss": 0.6571633219718933, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.2408187838651414, | |
| "grad_norm": 0.6553277230992378, | |
| "learning_rate": 3.462490703664846e-05, | |
| "loss": 0.49589502811431885, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 1.2420228777844673, | |
| "grad_norm": 0.663723104863035, | |
| "learning_rate": 3.4530217208939274e-05, | |
| "loss": 0.5967923998832703, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.243226971703793, | |
| "grad_norm": 0.709133317311018, | |
| "learning_rate": 3.443558869666598e-05, | |
| "loss": 0.6661368012428284, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 1.2444310656231186, | |
| "grad_norm": 0.7061324636303485, | |
| "learning_rate": 3.434102187489451e-05, | |
| "loss": 0.6587533354759216, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.2456351595424442, | |
| "grad_norm": 0.6579702542967999, | |
| "learning_rate": 3.424651711844632e-05, | |
| "loss": 0.5834370851516724, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.24683925346177, | |
| "grad_norm": 0.6615924637591785, | |
| "learning_rate": 3.415207480189676e-05, | |
| "loss": 0.6136612892150879, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.2480433473810957, | |
| "grad_norm": 0.6772710951274478, | |
| "learning_rate": 3.405769529957381e-05, | |
| "loss": 0.6056623458862305, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 1.2492474413004215, | |
| "grad_norm": 0.642702675489295, | |
| "learning_rate": 3.396337898555642e-05, | |
| "loss": 0.6358840465545654, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.2504515352197472, | |
| "grad_norm": 0.9252100643775776, | |
| "learning_rate": 3.386912623367311e-05, | |
| "loss": 0.5783637166023254, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 1.2516556291390728, | |
| "grad_norm": 0.701983248076389, | |
| "learning_rate": 3.377493741750044e-05, | |
| "loss": 0.6720529198646545, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.2528597230583984, | |
| "grad_norm": 0.6812048584013172, | |
| "learning_rate": 3.36808129103616e-05, | |
| "loss": 0.6228716373443604, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.2540638169777243, | |
| "grad_norm": 0.6154647288628092, | |
| "learning_rate": 3.358675308532486e-05, | |
| "loss": 0.5664002895355225, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.25526791089705, | |
| "grad_norm": 0.6241040220254938, | |
| "learning_rate": 3.349275831520212e-05, | |
| "loss": 0.5217874050140381, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 1.2564720048163758, | |
| "grad_norm": 0.6562678876719462, | |
| "learning_rate": 3.339882897254746e-05, | |
| "loss": 0.5833320021629333, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.2576760987357014, | |
| "grad_norm": 0.6095915930331086, | |
| "learning_rate": 3.330496542965556e-05, | |
| "loss": 0.6384044885635376, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.258880192655027, | |
| "grad_norm": 0.6416377081327351, | |
| "learning_rate": 3.3211168058560386e-05, | |
| "loss": 0.5927673578262329, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.2600842865743527, | |
| "grad_norm": 0.6100247656923646, | |
| "learning_rate": 3.311743723103357e-05, | |
| "loss": 0.6136247515678406, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 1.2612883804936785, | |
| "grad_norm": 0.6317630200071538, | |
| "learning_rate": 3.302377331858302e-05, | |
| "loss": 0.6653052568435669, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.2624924744130042, | |
| "grad_norm": 0.6274741674474991, | |
| "learning_rate": 3.293017669245144e-05, | |
| "loss": 0.5540253520011902, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 1.26369656833233, | |
| "grad_norm": 0.6909319297738697, | |
| "learning_rate": 3.283664772361476e-05, | |
| "loss": 0.6327494382858276, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.2649006622516556, | |
| "grad_norm": 0.6952322182958984, | |
| "learning_rate": 3.274318678278085e-05, | |
| "loss": 0.6329002380371094, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 1.2661047561709813, | |
| "grad_norm": 0.6308869283921665, | |
| "learning_rate": 3.264979424038789e-05, | |
| "loss": 0.5057480335235596, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.267308850090307, | |
| "grad_norm": 0.6285730530315006, | |
| "learning_rate": 3.255647046660296e-05, | |
| "loss": 0.6263934969902039, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 1.2685129440096328, | |
| "grad_norm": 0.6400200018215665, | |
| "learning_rate": 3.246321583132058e-05, | |
| "loss": 0.6725963950157166, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.2697170379289584, | |
| "grad_norm": 0.7099091081910462, | |
| "learning_rate": 3.237003070416122e-05, | |
| "loss": 0.6018358469009399, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.2709211318482843, | |
| "grad_norm": 0.6135782078162696, | |
| "learning_rate": 3.22769154544699e-05, | |
| "loss": 0.5996497869491577, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.27212522576761, | |
| "grad_norm": 0.6866426223054635, | |
| "learning_rate": 3.2183870451314624e-05, | |
| "loss": 0.5664709806442261, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 1.2733293196869355, | |
| "grad_norm": 0.6090505212825614, | |
| "learning_rate": 3.2090896063485e-05, | |
| "loss": 0.5572994351387024, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.2745334136062612, | |
| "grad_norm": 0.6372391798232508, | |
| "learning_rate": 3.19979926594907e-05, | |
| "loss": 0.5810008645057678, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 1.275737507525587, | |
| "grad_norm": 0.648696717169655, | |
| "learning_rate": 3.190516060756009e-05, | |
| "loss": 0.5552839040756226, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.2769416014449126, | |
| "grad_norm": 0.6031159136239559, | |
| "learning_rate": 3.181240027563875e-05, | |
| "loss": 0.5894042253494263, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 1.2781456953642385, | |
| "grad_norm": 0.6774291943197286, | |
| "learning_rate": 3.171971203138795e-05, | |
| "loss": 0.678736686706543, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.2793497892835641, | |
| "grad_norm": 0.609896680228515, | |
| "learning_rate": 3.1627096242183243e-05, | |
| "loss": 0.5225570201873779, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 1.2805538832028898, | |
| "grad_norm": 0.679157586878992, | |
| "learning_rate": 3.1534553275112994e-05, | |
| "loss": 0.5883038640022278, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.2817579771222154, | |
| "grad_norm": 0.6905787372952888, | |
| "learning_rate": 3.144208349697696e-05, | |
| "loss": 0.6585335731506348, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.2829620710415413, | |
| "grad_norm": 0.6058564268171939, | |
| "learning_rate": 3.13496872742848e-05, | |
| "loss": 0.5555436611175537, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.2841661649608669, | |
| "grad_norm": 0.6887579235137145, | |
| "learning_rate": 3.125736497325464e-05, | |
| "loss": 0.6298503279685974, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 1.2853702588801927, | |
| "grad_norm": 0.6477445948330529, | |
| "learning_rate": 3.116511695981157e-05, | |
| "loss": 0.5965017676353455, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.2865743527995184, | |
| "grad_norm": 0.6675640245384982, | |
| "learning_rate": 3.107294359958628e-05, | |
| "loss": 0.5505380034446716, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 1.287778446718844, | |
| "grad_norm": 0.6437167909226186, | |
| "learning_rate": 3.098084525791356e-05, | |
| "loss": 0.6020991802215576, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.2889825406381699, | |
| "grad_norm": 0.6633990777410962, | |
| "learning_rate": 3.0888822299830854e-05, | |
| "loss": 0.552418053150177, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 1.2901866345574955, | |
| "grad_norm": 0.6046857201539841, | |
| "learning_rate": 3.079687509007682e-05, | |
| "loss": 0.5754314064979553, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.2913907284768211, | |
| "grad_norm": 0.6971859400045242, | |
| "learning_rate": 3.070500399308987e-05, | |
| "loss": 0.6145979762077332, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 1.292594822396147, | |
| "grad_norm": 0.6949536669306756, | |
| "learning_rate": 3.0613209373006745e-05, | |
| "loss": 0.6195294260978699, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.2937989163154726, | |
| "grad_norm": 0.6242972237847882, | |
| "learning_rate": 3.052149159366109e-05, | |
| "loss": 0.5844000577926636, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.2950030102347982, | |
| "grad_norm": 0.7035332273264987, | |
| "learning_rate": 3.0429851018581955e-05, | |
| "loss": 0.5531705617904663, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.296207104154124, | |
| "grad_norm": 0.632115068867665, | |
| "learning_rate": 3.0338288010992398e-05, | |
| "loss": 0.5232405066490173, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 1.2974111980734497, | |
| "grad_norm": 0.6125142431442002, | |
| "learning_rate": 3.024680293380804e-05, | |
| "loss": 0.53972327709198, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.2986152919927754, | |
| "grad_norm": 0.7166615692901409, | |
| "learning_rate": 3.0155396149635585e-05, | |
| "loss": 0.5764614343643188, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 1.2998193859121012, | |
| "grad_norm": 0.6590577657401502, | |
| "learning_rate": 3.0064068020771486e-05, | |
| "loss": 0.5957779884338379, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.3010234798314269, | |
| "grad_norm": 0.6383819520827908, | |
| "learning_rate": 2.9972818909200396e-05, | |
| "loss": 0.5310415029525757, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 1.3022275737507525, | |
| "grad_norm": 0.6264074933268168, | |
| "learning_rate": 2.988164917659375e-05, | |
| "loss": 0.5686416029930115, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.3034316676700783, | |
| "grad_norm": 0.5735237535889868, | |
| "learning_rate": 2.979055918430842e-05, | |
| "loss": 0.6153852939605713, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 1.304635761589404, | |
| "grad_norm": 0.6338428525876928, | |
| "learning_rate": 2.9699549293385176e-05, | |
| "loss": 0.5895254611968994, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.3058398555087296, | |
| "grad_norm": 0.6628514943014948, | |
| "learning_rate": 2.9608619864547337e-05, | |
| "loss": 0.5714896321296692, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.3070439494280555, | |
| "grad_norm": 0.6498256880211087, | |
| "learning_rate": 2.95177712581993e-05, | |
| "loss": 0.637000560760498, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.308248043347381, | |
| "grad_norm": 0.5805140898940456, | |
| "learning_rate": 2.9427003834425075e-05, | |
| "loss": 0.5090338587760925, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 1.3094521372667067, | |
| "grad_norm": 0.6568092367082321, | |
| "learning_rate": 2.9336317952986946e-05, | |
| "loss": 0.5969448089599609, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.3106562311860326, | |
| "grad_norm": 0.6422165737966679, | |
| "learning_rate": 2.924571397332398e-05, | |
| "loss": 0.6276159286499023, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 1.3118603251053582, | |
| "grad_norm": 0.6264592124277943, | |
| "learning_rate": 2.915519225455065e-05, | |
| "loss": 0.5930631756782532, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.3130644190246838, | |
| "grad_norm": 0.760451843468021, | |
| "learning_rate": 2.906475315545532e-05, | |
| "loss": 0.5920497179031372, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 1.3142685129440097, | |
| "grad_norm": 0.7376228210515698, | |
| "learning_rate": 2.8974397034498917e-05, | |
| "loss": 0.6203291416168213, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.3154726068633353, | |
| "grad_norm": 0.6311093043141208, | |
| "learning_rate": 2.8884124249813526e-05, | |
| "loss": 0.5911815166473389, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 1.316676700782661, | |
| "grad_norm": 0.6725958475028008, | |
| "learning_rate": 2.879393515920087e-05, | |
| "loss": 0.5948607325553894, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.3178807947019868, | |
| "grad_norm": 0.6026740962582275, | |
| "learning_rate": 2.8703830120130976e-05, | |
| "loss": 0.5619141459465027, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.3190848886213125, | |
| "grad_norm": 0.7327869849262203, | |
| "learning_rate": 2.8613809489740662e-05, | |
| "loss": 0.5664507150650024, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.320288982540638, | |
| "grad_norm": 0.6133018339124032, | |
| "learning_rate": 2.8523873624832247e-05, | |
| "loss": 0.4463905692100525, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 1.321493076459964, | |
| "grad_norm": 0.6412840762738142, | |
| "learning_rate": 2.8434022881872103e-05, | |
| "loss": 0.60805344581604, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.3226971703792896, | |
| "grad_norm": 0.6492164235458658, | |
| "learning_rate": 2.8344257616989144e-05, | |
| "loss": 0.5742907524108887, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 1.3239012642986152, | |
| "grad_norm": 0.634280196253906, | |
| "learning_rate": 2.8254578185973556e-05, | |
| "loss": 0.5517445206642151, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.325105358217941, | |
| "grad_norm": 0.608061491520655, | |
| "learning_rate": 2.8164984944275242e-05, | |
| "loss": 0.5493748784065247, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 1.3263094521372667, | |
| "grad_norm": 0.7070185171134912, | |
| "learning_rate": 2.8075478247002518e-05, | |
| "loss": 0.6650428175926208, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.3275135460565926, | |
| "grad_norm": 0.7362968775469271, | |
| "learning_rate": 2.7986058448920728e-05, | |
| "loss": 0.6279030442237854, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 1.3287176399759182, | |
| "grad_norm": 0.6587185876014878, | |
| "learning_rate": 2.7896725904450748e-05, | |
| "loss": 0.5652649402618408, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.3299217338952438, | |
| "grad_norm": 0.6879422504214547, | |
| "learning_rate": 2.7807480967667576e-05, | |
| "loss": 0.5989348888397217, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.3311258278145695, | |
| "grad_norm": 0.6023282107853354, | |
| "learning_rate": 2.7718323992299e-05, | |
| "loss": 0.5741807818412781, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.3323299217338953, | |
| "grad_norm": 0.6239731759231483, | |
| "learning_rate": 2.7629255331724225e-05, | |
| "loss": 0.5270892977714539, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 1.333534015653221, | |
| "grad_norm": 0.7508836444752833, | |
| "learning_rate": 2.7540275338972343e-05, | |
| "loss": 0.6074275374412537, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.3347381095725468, | |
| "grad_norm": 0.6746453730710107, | |
| "learning_rate": 2.7451384366721057e-05, | |
| "loss": 0.6220452189445496, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 1.3359422034918724, | |
| "grad_norm": 0.6766167757323907, | |
| "learning_rate": 2.7362582767295158e-05, | |
| "loss": 0.5820175409317017, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.337146297411198, | |
| "grad_norm": 0.690797272196758, | |
| "learning_rate": 2.7273870892665253e-05, | |
| "loss": 0.5957927107810974, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 1.3383503913305237, | |
| "grad_norm": 0.6430318040991272, | |
| "learning_rate": 2.718524909444635e-05, | |
| "loss": 0.5154853463172913, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.3395544852498495, | |
| "grad_norm": 0.6582915506900874, | |
| "learning_rate": 2.709671772389639e-05, | |
| "loss": 0.5981411933898926, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 1.3407585791691752, | |
| "grad_norm": 0.6590855324118988, | |
| "learning_rate": 2.7008277131914916e-05, | |
| "loss": 0.6747996211051941, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.341962673088501, | |
| "grad_norm": 0.6448337036105954, | |
| "learning_rate": 2.691992766904161e-05, | |
| "loss": 0.5976516008377075, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.3431667670078267, | |
| "grad_norm": 0.661465770553295, | |
| "learning_rate": 2.683166968545503e-05, | |
| "loss": 0.5724648833274841, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.3443708609271523, | |
| "grad_norm": 0.7965142724805433, | |
| "learning_rate": 2.6743503530971138e-05, | |
| "loss": 0.6514641046524048, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 1.345574954846478, | |
| "grad_norm": 0.6130309700978976, | |
| "learning_rate": 2.6655429555041922e-05, | |
| "loss": 0.524080753326416, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.3467790487658038, | |
| "grad_norm": 0.676879651641497, | |
| "learning_rate": 2.656744810675397e-05, | |
| "loss": 0.5969399809837341, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 1.3479831426851294, | |
| "grad_norm": 0.6443187750796282, | |
| "learning_rate": 2.6479559534827168e-05, | |
| "loss": 0.5908799171447754, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.3491872366044553, | |
| "grad_norm": 0.7293081468185466, | |
| "learning_rate": 2.6391764187613277e-05, | |
| "loss": 0.639958381652832, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 1.350391330523781, | |
| "grad_norm": 0.7245543963855401, | |
| "learning_rate": 2.6304062413094588e-05, | |
| "loss": 0.5507534742355347, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.3515954244431065, | |
| "grad_norm": 0.7138419286324111, | |
| "learning_rate": 2.6216454558882486e-05, | |
| "loss": 0.5698534846305847, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 1.3527995183624322, | |
| "grad_norm": 0.658594597820022, | |
| "learning_rate": 2.6128940972216044e-05, | |
| "loss": 0.6738436818122864, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.354003612281758, | |
| "grad_norm": 0.6684797667576053, | |
| "learning_rate": 2.604152199996077e-05, | |
| "loss": 0.5904508829116821, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.3552077062010837, | |
| "grad_norm": 0.6642004437577558, | |
| "learning_rate": 2.5954197988607133e-05, | |
| "loss": 0.6535474061965942, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.3564118001204095, | |
| "grad_norm": 0.6007990769985496, | |
| "learning_rate": 2.586696928426926e-05, | |
| "loss": 0.6121514439582825, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 1.3576158940397351, | |
| "grad_norm": 0.6336515572591528, | |
| "learning_rate": 2.5779836232683442e-05, | |
| "loss": 0.5832182765007019, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.3588199879590608, | |
| "grad_norm": 0.6421286075260746, | |
| "learning_rate": 2.5692799179206906e-05, | |
| "loss": 0.5742759704589844, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 1.3600240818783864, | |
| "grad_norm": 0.690150823325512, | |
| "learning_rate": 2.5605858468816358e-05, | |
| "loss": 0.580817461013794, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.3612281757977123, | |
| "grad_norm": 0.6896471517754647, | |
| "learning_rate": 2.5519014446106682e-05, | |
| "loss": 0.588418185710907, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 1.362432269717038, | |
| "grad_norm": 0.613216584761024, | |
| "learning_rate": 2.5432267455289503e-05, | |
| "loss": 0.5638326406478882, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 0.6210439632532312, | |
| "learning_rate": 2.5345617840191828e-05, | |
| "loss": 0.5994534492492676, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 1.3648404575556894, | |
| "grad_norm": 0.6505033824932888, | |
| "learning_rate": 2.5259065944254746e-05, | |
| "loss": 0.6377952098846436, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.366044551475015, | |
| "grad_norm": 0.6974309419675122, | |
| "learning_rate": 2.5172612110532012e-05, | |
| "loss": 0.6890112161636353, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.3672486453943407, | |
| "grad_norm": 0.6202436590581667, | |
| "learning_rate": 2.5086256681688745e-05, | |
| "loss": 0.5471165180206299, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.3684527393136665, | |
| "grad_norm": 0.5797278347327742, | |
| "learning_rate": 2.500000000000001e-05, | |
| "loss": 0.5749981999397278, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 1.3696568332329921, | |
| "grad_norm": 0.6710778824322429, | |
| "learning_rate": 2.491384240734943e-05, | |
| "loss": 0.6004801988601685, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.370860927152318, | |
| "grad_norm": 0.6858811733202708, | |
| "learning_rate": 2.4827784245227937e-05, | |
| "loss": 0.6247427463531494, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 1.3720650210716436, | |
| "grad_norm": 0.6422633870461217, | |
| "learning_rate": 2.4741825854732344e-05, | |
| "loss": 0.574191689491272, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.3732691149909693, | |
| "grad_norm": 0.6357294018183007, | |
| "learning_rate": 2.4655967576564064e-05, | |
| "loss": 0.5395843386650085, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 1.374473208910295, | |
| "grad_norm": 0.6263451518446398, | |
| "learning_rate": 2.4570209751027623e-05, | |
| "loss": 0.5604453086853027, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.3756773028296208, | |
| "grad_norm": 0.678607658650321, | |
| "learning_rate": 2.4484552718029448e-05, | |
| "loss": 0.6286793947219849, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 1.3768813967489464, | |
| "grad_norm": 0.6734943023110295, | |
| "learning_rate": 2.439899681707646e-05, | |
| "loss": 0.6490384340286255, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.3780854906682722, | |
| "grad_norm": 0.6232256600206579, | |
| "learning_rate": 2.4313542387274717e-05, | |
| "loss": 0.56013023853302, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.3792895845875979, | |
| "grad_norm": 0.6512462577282983, | |
| "learning_rate": 2.4228189767328173e-05, | |
| "loss": 0.5262826085090637, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.3804936785069235, | |
| "grad_norm": 0.6534793889464704, | |
| "learning_rate": 2.4142939295537126e-05, | |
| "loss": 0.5833800435066223, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 1.3816977724262491, | |
| "grad_norm": 0.6376912481118475, | |
| "learning_rate": 2.405779130979709e-05, | |
| "loss": 0.5151797533035278, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.382901866345575, | |
| "grad_norm": 0.6433663845535533, | |
| "learning_rate": 2.3972746147597335e-05, | |
| "loss": 0.5034987926483154, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 1.3841059602649006, | |
| "grad_norm": 0.6387226460653036, | |
| "learning_rate": 2.388780414601959e-05, | |
| "loss": 0.6382157206535339, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.3853100541842265, | |
| "grad_norm": 0.6525422712893322, | |
| "learning_rate": 2.380296564173674e-05, | |
| "loss": 0.5377070903778076, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 1.3865141481035521, | |
| "grad_norm": 0.6361638026870527, | |
| "learning_rate": 2.3718230971011386e-05, | |
| "loss": 0.553045392036438, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.3877182420228777, | |
| "grad_norm": 0.6604901502553285, | |
| "learning_rate": 2.3633600469694606e-05, | |
| "loss": 0.5241578817367554, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 1.3889223359422034, | |
| "grad_norm": 0.6609889554265359, | |
| "learning_rate": 2.3549074473224612e-05, | |
| "loss": 0.5158299207687378, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.3901264298615292, | |
| "grad_norm": 0.681790546788809, | |
| "learning_rate": 2.3464653316625385e-05, | |
| "loss": 0.5271784067153931, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.3913305237808549, | |
| "grad_norm": 0.6429951344093262, | |
| "learning_rate": 2.3380337334505374e-05, | |
| "loss": 0.6050268411636353, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.3925346177001807, | |
| "grad_norm": 0.6927991853136467, | |
| "learning_rate": 2.3296126861056167e-05, | |
| "loss": 0.5803988575935364, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 1.3937387116195064, | |
| "grad_norm": 0.6355445702814094, | |
| "learning_rate": 2.3212022230051154e-05, | |
| "loss": 0.545645534992218, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.394942805538832, | |
| "grad_norm": 0.6992209580511681, | |
| "learning_rate": 2.3128023774844194e-05, | |
| "loss": 0.6576249003410339, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 1.3961468994581576, | |
| "grad_norm": 0.7005498664256508, | |
| "learning_rate": 2.3044131828368386e-05, | |
| "loss": 0.582336962223053, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.3973509933774835, | |
| "grad_norm": 0.70377493838526, | |
| "learning_rate": 2.2960346723134575e-05, | |
| "loss": 0.5340085029602051, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 1.398555087296809, | |
| "grad_norm": 0.6419337587745647, | |
| "learning_rate": 2.2876668791230193e-05, | |
| "loss": 0.6333436369895935, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.399759181216135, | |
| "grad_norm": 0.7263202551262201, | |
| "learning_rate": 2.2793098364317868e-05, | |
| "loss": 0.6011248826980591, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 1.4009632751354606, | |
| "grad_norm": 0.7532824340862169, | |
| "learning_rate": 2.2709635773634113e-05, | |
| "loss": 0.5780522227287292, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.4021673690547862, | |
| "grad_norm": 0.6334080866576622, | |
| "learning_rate": 2.2626281349988103e-05, | |
| "loss": 0.62692791223526, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.4033714629741119, | |
| "grad_norm": 0.6900123881398824, | |
| "learning_rate": 2.254303542376016e-05, | |
| "loss": 0.629398763179779, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.4045755568934377, | |
| "grad_norm": 0.8266154500851933, | |
| "learning_rate": 2.2459898324900662e-05, | |
| "loss": 0.5797542929649353, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 1.4057796508127633, | |
| "grad_norm": 0.6600289647001534, | |
| "learning_rate": 2.2376870382928607e-05, | |
| "loss": 0.5778429508209229, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.4069837447320892, | |
| "grad_norm": 0.6559688369671797, | |
| "learning_rate": 2.2293951926930356e-05, | |
| "loss": 0.5707594156265259, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 1.4081878386514148, | |
| "grad_norm": 0.6653841370747816, | |
| "learning_rate": 2.2211143285558312e-05, | |
| "loss": 0.6159984469413757, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.4093919325707405, | |
| "grad_norm": 0.7649472065378574, | |
| "learning_rate": 2.2128444787029618e-05, | |
| "loss": 0.6222115755081177, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 1.410596026490066, | |
| "grad_norm": 0.6796942807832598, | |
| "learning_rate": 2.204585675912486e-05, | |
| "loss": 0.6199472546577454, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.411800120409392, | |
| "grad_norm": 0.6199548741185632, | |
| "learning_rate": 2.1963379529186768e-05, | |
| "loss": 0.590892493724823, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 1.4130042143287176, | |
| "grad_norm": 0.6187634070466261, | |
| "learning_rate": 2.1881013424118922e-05, | |
| "loss": 0.5503128170967102, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.4142083082480434, | |
| "grad_norm": 0.6512488781347366, | |
| "learning_rate": 2.1798758770384442e-05, | |
| "loss": 0.5549368858337402, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.415412402167369, | |
| "grad_norm": 0.6347054155507705, | |
| "learning_rate": 2.171661589400471e-05, | |
| "loss": 0.6251223087310791, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.4166164960866947, | |
| "grad_norm": 0.6695819615416071, | |
| "learning_rate": 2.1634585120558078e-05, | |
| "loss": 0.5890984535217285, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 1.4178205900060203, | |
| "grad_norm": 0.6814695161224296, | |
| "learning_rate": 2.1552666775178548e-05, | |
| "loss": 0.5793978571891785, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.4190246839253462, | |
| "grad_norm": 0.6726937714934542, | |
| "learning_rate": 2.1470861182554534e-05, | |
| "loss": 0.6175557971000671, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 1.4202287778446718, | |
| "grad_norm": 0.6678641303443984, | |
| "learning_rate": 2.138916866692754e-05, | |
| "loss": 0.6240713596343994, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.4214328717639977, | |
| "grad_norm": 0.6201792755242529, | |
| "learning_rate": 2.1307589552090866e-05, | |
| "loss": 0.5818585157394409, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 1.4226369656833233, | |
| "grad_norm": 0.7015372658065818, | |
| "learning_rate": 2.1226124161388354e-05, | |
| "loss": 0.572142481803894, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.423841059602649, | |
| "grad_norm": 0.6082394247396891, | |
| "learning_rate": 2.1144772817713103e-05, | |
| "loss": 0.4766699969768524, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 1.4250451535219746, | |
| "grad_norm": 0.6387846065976666, | |
| "learning_rate": 2.106353584350616e-05, | |
| "loss": 0.6041740775108337, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.4262492474413004, | |
| "grad_norm": 0.5982939961143208, | |
| "learning_rate": 2.0982413560755272e-05, | |
| "loss": 0.5946816205978394, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.427453341360626, | |
| "grad_norm": 0.6688710695951081, | |
| "learning_rate": 2.0901406290993598e-05, | |
| "loss": 0.5802619457244873, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.428657435279952, | |
| "grad_norm": 0.7078249109081977, | |
| "learning_rate": 2.0820514355298432e-05, | |
| "loss": 0.6126466989517212, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 1.4298615291992776, | |
| "grad_norm": 0.6445823260166493, | |
| "learning_rate": 2.073973807428995e-05, | |
| "loss": 0.5297144651412964, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.4310656231186032, | |
| "grad_norm": 0.6820450966327305, | |
| "learning_rate": 2.0659077768129898e-05, | |
| "loss": 0.6424897909164429, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 1.4322697170379288, | |
| "grad_norm": 0.5940299387097965, | |
| "learning_rate": 2.0578533756520375e-05, | |
| "loss": 0.5096526741981506, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.4334738109572547, | |
| "grad_norm": 0.5993494873406653, | |
| "learning_rate": 2.049810635870251e-05, | |
| "loss": 0.5202600955963135, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 1.4346779048765803, | |
| "grad_norm": 0.7614707388132266, | |
| "learning_rate": 2.0417795893455265e-05, | |
| "loss": 0.6343950629234314, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.4358819987959062, | |
| "grad_norm": 0.653826639908725, | |
| "learning_rate": 2.0337602679094092e-05, | |
| "loss": 0.605528712272644, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 1.4370860927152318, | |
| "grad_norm": 0.7089308232671065, | |
| "learning_rate": 2.0257527033469735e-05, | |
| "loss": 0.6146092414855957, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.4382901866345574, | |
| "grad_norm": 0.6770424284620841, | |
| "learning_rate": 2.0177569273966945e-05, | |
| "loss": 0.6413626670837402, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.439494280553883, | |
| "grad_norm": 0.7544563351535553, | |
| "learning_rate": 2.009772971750322e-05, | |
| "loss": 0.5747498273849487, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.440698374473209, | |
| "grad_norm": 0.6243897855893447, | |
| "learning_rate": 2.0018008680527556e-05, | |
| "loss": 0.5720393657684326, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 1.4419024683925346, | |
| "grad_norm": 0.6904757741414711, | |
| "learning_rate": 1.9938406479019183e-05, | |
| "loss": 0.5748528838157654, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.4431065623118604, | |
| "grad_norm": 0.6133136479702628, | |
| "learning_rate": 1.985892342848633e-05, | |
| "loss": 0.5648344159126282, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 1.444310656231186, | |
| "grad_norm": 0.692341065686183, | |
| "learning_rate": 1.9779559843964957e-05, | |
| "loss": 0.5337613224983215, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4455147501505117, | |
| "grad_norm": 0.6932538183790291, | |
| "learning_rate": 1.9700316040017515e-05, | |
| "loss": 0.5030450224876404, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 1.4467188440698375, | |
| "grad_norm": 0.6085144049389716, | |
| "learning_rate": 1.9621192330731684e-05, | |
| "loss": 0.5549513697624207, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.4479229379891632, | |
| "grad_norm": 0.7071743976330627, | |
| "learning_rate": 1.954218902971917e-05, | |
| "loss": 0.6263749599456787, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 1.4491270319084888, | |
| "grad_norm": 0.6532630278934997, | |
| "learning_rate": 1.9463306450114416e-05, | |
| "loss": 0.5744891166687012, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.4503311258278146, | |
| "grad_norm": 0.6910032058096278, | |
| "learning_rate": 1.938454490457337e-05, | |
| "loss": 0.6153848767280579, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.4515352197471403, | |
| "grad_norm": 0.6854934057332991, | |
| "learning_rate": 1.9305904705272275e-05, | |
| "loss": 0.6504592299461365, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.452739313666466, | |
| "grad_norm": 0.6844756673065995, | |
| "learning_rate": 1.922738616390639e-05, | |
| "loss": 0.6322448253631592, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 1.4539434075857918, | |
| "grad_norm": 0.5726761208441021, | |
| "learning_rate": 1.9148989591688786e-05, | |
| "loss": 0.5301163792610168, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.4551475015051174, | |
| "grad_norm": 0.6499098121575463, | |
| "learning_rate": 1.9070715299349105e-05, | |
| "loss": 0.5792317390441895, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 1.456351595424443, | |
| "grad_norm": 0.6294029092092295, | |
| "learning_rate": 1.8992563597132323e-05, | |
| "loss": 0.5840668082237244, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.457555689343769, | |
| "grad_norm": 0.6288063174886138, | |
| "learning_rate": 1.8914534794797513e-05, | |
| "loss": 0.5666556358337402, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 1.4587597832630945, | |
| "grad_norm": 0.6811900356717014, | |
| "learning_rate": 1.8836629201616635e-05, | |
| "loss": 0.6121422648429871, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.4599638771824202, | |
| "grad_norm": 0.7039207316933149, | |
| "learning_rate": 1.8758847126373303e-05, | |
| "loss": 0.5998222827911377, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 1.461167971101746, | |
| "grad_norm": 0.6245939644307974, | |
| "learning_rate": 1.8681188877361555e-05, | |
| "loss": 0.5631198883056641, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.4623720650210716, | |
| "grad_norm": 0.6657764395909689, | |
| "learning_rate": 1.8603654762384642e-05, | |
| "loss": 0.5070215463638306, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.4635761589403973, | |
| "grad_norm": 0.6946821767356586, | |
| "learning_rate": 1.85262450887538e-05, | |
| "loss": 0.5660195350646973, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.4647802528597231, | |
| "grad_norm": 0.6486254208618857, | |
| "learning_rate": 1.8448960163287034e-05, | |
| "loss": 0.6307017207145691, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 1.4659843467790488, | |
| "grad_norm": 0.651794455656358, | |
| "learning_rate": 1.83718002923079e-05, | |
| "loss": 0.6030329465866089, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.4671884406983744, | |
| "grad_norm": 0.7355979968125673, | |
| "learning_rate": 1.8294765781644285e-05, | |
| "loss": 0.6677907109260559, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 1.4683925346177003, | |
| "grad_norm": 0.6991027780768566, | |
| "learning_rate": 1.821785693662724e-05, | |
| "loss": 0.5083788633346558, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.4695966285370259, | |
| "grad_norm": 0.6647763000946036, | |
| "learning_rate": 1.8141074062089657e-05, | |
| "loss": 0.603174090385437, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 1.4708007224563515, | |
| "grad_norm": 0.7002973158395647, | |
| "learning_rate": 1.8064417462365226e-05, | |
| "loss": 0.5698137283325195, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.4720048163756774, | |
| "grad_norm": 0.5833475144603689, | |
| "learning_rate": 1.798788744128707e-05, | |
| "loss": 0.5906316637992859, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 1.473208910295003, | |
| "grad_norm": 0.6447305050041968, | |
| "learning_rate": 1.791148430218666e-05, | |
| "loss": 0.6068233847618103, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.4744130042143286, | |
| "grad_norm": 0.6277096478361395, | |
| "learning_rate": 1.7835208347892535e-05, | |
| "loss": 0.5847250819206238, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.4756170981336545, | |
| "grad_norm": 0.7414175683459697, | |
| "learning_rate": 1.775905988072909e-05, | |
| "loss": 0.5115228295326233, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.4768211920529801, | |
| "grad_norm": 0.697043493410439, | |
| "learning_rate": 1.768303920251551e-05, | |
| "loss": 0.5767746567726135, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 1.4780252859723058, | |
| "grad_norm": 0.6337907702044968, | |
| "learning_rate": 1.7607146614564418e-05, | |
| "loss": 0.5868975520133972, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.4792293798916316, | |
| "grad_norm": 0.6840890488968902, | |
| "learning_rate": 1.753138241768075e-05, | |
| "loss": 0.5457339882850647, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 1.4804334738109572, | |
| "grad_norm": 0.6499123239023711, | |
| "learning_rate": 1.7455746912160564e-05, | |
| "loss": 0.551403284072876, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.4816375677302829, | |
| "grad_norm": 0.6101386305043538, | |
| "learning_rate": 1.7380240397789836e-05, | |
| "loss": 0.5619639158248901, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 1.4828416616496087, | |
| "grad_norm": 0.7183606920392251, | |
| "learning_rate": 1.7304863173843283e-05, | |
| "loss": 0.5660442113876343, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.4840457555689344, | |
| "grad_norm": 0.6850065823958903, | |
| "learning_rate": 1.722961553908318e-05, | |
| "loss": 0.6238867044448853, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 1.4852498494882602, | |
| "grad_norm": 0.716310161029593, | |
| "learning_rate": 1.7154497791758157e-05, | |
| "loss": 0.6281149387359619, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.4864539434075859, | |
| "grad_norm": 0.6551910228833004, | |
| "learning_rate": 1.707951022960199e-05, | |
| "loss": 0.6366140246391296, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.4876580373269115, | |
| "grad_norm": 0.6875009342751863, | |
| "learning_rate": 1.700465314983254e-05, | |
| "loss": 0.5912625789642334, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.4888621312462371, | |
| "grad_norm": 0.7103107098011074, | |
| "learning_rate": 1.6929926849150428e-05, | |
| "loss": 0.507576584815979, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 1.490066225165563, | |
| "grad_norm": 0.7785689863841994, | |
| "learning_rate": 1.6855331623737958e-05, | |
| "loss": 0.5702663064002991, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.4912703190848886, | |
| "grad_norm": 0.7085393061941329, | |
| "learning_rate": 1.6780867769257898e-05, | |
| "loss": 0.6586170792579651, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 1.4924744130042145, | |
| "grad_norm": 0.6929869277646624, | |
| "learning_rate": 1.6706535580852267e-05, | |
| "loss": 0.6001737117767334, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.49367850692354, | |
| "grad_norm": 0.619473871811379, | |
| "learning_rate": 1.6632335353141333e-05, | |
| "loss": 0.5791480541229248, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 1.4948826008428657, | |
| "grad_norm": 0.7486550213430624, | |
| "learning_rate": 1.6558267380222224e-05, | |
| "loss": 0.6321637630462646, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.4960866947621914, | |
| "grad_norm": 0.6112954966976807, | |
| "learning_rate": 1.6484331955667947e-05, | |
| "loss": 0.5635457634925842, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 1.4972907886815172, | |
| "grad_norm": 0.7108878803102963, | |
| "learning_rate": 1.6410529372526057e-05, | |
| "loss": 0.7190176248550415, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.4984948826008428, | |
| "grad_norm": 0.6658849135083088, | |
| "learning_rate": 1.6336859923317643e-05, | |
| "loss": 0.5681280493736267, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.4996989765201687, | |
| "grad_norm": 0.6118879060848942, | |
| "learning_rate": 1.6263323900036126e-05, | |
| "loss": 0.5625150203704834, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.5009030704394943, | |
| "grad_norm": 0.643240167423122, | |
| "learning_rate": 1.6189921594146046e-05, | |
| "loss": 0.5696181058883667, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 1.50210716435882, | |
| "grad_norm": 0.5934555486357125, | |
| "learning_rate": 1.6116653296581975e-05, | |
| "loss": 0.5061366558074951, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.5033112582781456, | |
| "grad_norm": 0.6301639218719404, | |
| "learning_rate": 1.6043519297747285e-05, | |
| "loss": 0.5271162986755371, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 1.5045153521974715, | |
| "grad_norm": 0.6558350418754424, | |
| "learning_rate": 1.5970519887513073e-05, | |
| "loss": 0.5793251991271973, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.505719446116797, | |
| "grad_norm": 0.6166848173267853, | |
| "learning_rate": 1.5897655355217038e-05, | |
| "loss": 0.49026674032211304, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 1.506923540036123, | |
| "grad_norm": 0.639230504451563, | |
| "learning_rate": 1.5824925989662216e-05, | |
| "loss": 0.4468444585800171, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.5081276339554486, | |
| "grad_norm": 0.6279029266897213, | |
| "learning_rate": 1.5752332079115932e-05, | |
| "loss": 0.5121999979019165, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 1.5093317278747742, | |
| "grad_norm": 0.6897589017387046, | |
| "learning_rate": 1.5679873911308597e-05, | |
| "loss": 0.6195895075798035, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.5105358217940998, | |
| "grad_norm": 0.6405551867527671, | |
| "learning_rate": 1.56075517734326e-05, | |
| "loss": 0.6228514909744263, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.5117399157134257, | |
| "grad_norm": 0.6712649126230317, | |
| "learning_rate": 1.5535365952141233e-05, | |
| "loss": 0.626694917678833, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.5129440096327513, | |
| "grad_norm": 0.6944075950230268, | |
| "learning_rate": 1.5463316733547434e-05, | |
| "loss": 0.6293349266052246, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 1.5141481035520772, | |
| "grad_norm": 0.6474800499836418, | |
| "learning_rate": 1.5391404403222676e-05, | |
| "loss": 0.4614093601703644, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.5153521974714028, | |
| "grad_norm": 0.6354596486501298, | |
| "learning_rate": 1.5319629246195917e-05, | |
| "loss": 0.5340675115585327, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 1.5165562913907285, | |
| "grad_norm": 0.76231174162437, | |
| "learning_rate": 1.5247991546952428e-05, | |
| "loss": 0.5091834664344788, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.517760385310054, | |
| "grad_norm": 0.7027773645202476, | |
| "learning_rate": 1.5176491589432628e-05, | |
| "loss": 0.5760695338249207, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 1.51896447922938, | |
| "grad_norm": 0.6909468225730208, | |
| "learning_rate": 1.5105129657031009e-05, | |
| "loss": 0.5917767882347107, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.5201685731487056, | |
| "grad_norm": 0.6413099265304232, | |
| "learning_rate": 1.5033906032594958e-05, | |
| "loss": 0.48429784178733826, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 1.5213726670680314, | |
| "grad_norm": 0.7402093135988407, | |
| "learning_rate": 1.4962820998423683e-05, | |
| "loss": 0.5737481117248535, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.522576760987357, | |
| "grad_norm": 0.6633749969252996, | |
| "learning_rate": 1.4891874836267127e-05, | |
| "loss": 0.5403265357017517, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.5237808549066827, | |
| "grad_norm": 0.6703832368863357, | |
| "learning_rate": 1.4821067827324753e-05, | |
| "loss": 0.6162956953048706, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.5249849488260083, | |
| "grad_norm": 0.6913767161943216, | |
| "learning_rate": 1.4750400252244511e-05, | |
| "loss": 0.5615054965019226, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 1.5261890427453342, | |
| "grad_norm": 0.6431708429026083, | |
| "learning_rate": 1.4679872391121651e-05, | |
| "loss": 0.6342799663543701, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.5273931366646598, | |
| "grad_norm": 0.6246696358486011, | |
| "learning_rate": 1.4609484523497696e-05, | |
| "loss": 0.4823196232318878, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 1.5285972305839857, | |
| "grad_norm": 0.6643995241856853, | |
| "learning_rate": 1.4539236928359318e-05, | |
| "loss": 0.5989305377006531, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.5298013245033113, | |
| "grad_norm": 0.6970680691115482, | |
| "learning_rate": 1.4469129884137177e-05, | |
| "loss": 0.5505509972572327, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 1.531005418422637, | |
| "grad_norm": 0.7209244965404736, | |
| "learning_rate": 1.4399163668704829e-05, | |
| "loss": 0.5916901230812073, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.5322095123419626, | |
| "grad_norm": 0.6672238207490072, | |
| "learning_rate": 1.4329338559377691e-05, | |
| "loss": 0.5727331042289734, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 1.5334136062612884, | |
| "grad_norm": 0.7282879333042002, | |
| "learning_rate": 1.4259654832911867e-05, | |
| "loss": 0.6152413487434387, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.534617700180614, | |
| "grad_norm": 0.6848054247552551, | |
| "learning_rate": 1.4190112765503117e-05, | |
| "loss": 0.6155499815940857, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.53582179409994, | |
| "grad_norm": 0.6779262829361238, | |
| "learning_rate": 1.412071263278571e-05, | |
| "loss": 0.6265017986297607, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.5370258880192655, | |
| "grad_norm": 0.6345807833091854, | |
| "learning_rate": 1.4051454709831308e-05, | |
| "loss": 0.5726587772369385, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 1.5382299819385912, | |
| "grad_norm": 0.7476588469408837, | |
| "learning_rate": 1.398233927114797e-05, | |
| "loss": 0.6458104252815247, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.5394340758579168, | |
| "grad_norm": 1.1233935398982429, | |
| "learning_rate": 1.3913366590678966e-05, | |
| "loss": 0.47828546166419983, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 1.5406381697772427, | |
| "grad_norm": 0.6735195132494955, | |
| "learning_rate": 1.38445369418018e-05, | |
| "loss": 0.6620016694068909, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.5418422636965683, | |
| "grad_norm": 0.7434175109937722, | |
| "learning_rate": 1.377585059732701e-05, | |
| "loss": 0.5846740007400513, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 1.5430463576158941, | |
| "grad_norm": 0.6450732267312386, | |
| "learning_rate": 1.370730782949713e-05, | |
| "loss": 0.547637403011322, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.5442504515352198, | |
| "grad_norm": 0.6742849341522323, | |
| "learning_rate": 1.3638908909985621e-05, | |
| "loss": 0.5787967443466187, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 1.5454545454545454, | |
| "grad_norm": 0.63788147336325, | |
| "learning_rate": 1.3570654109895853e-05, | |
| "loss": 0.5432387590408325, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.546658639373871, | |
| "grad_norm": 0.6879935367171848, | |
| "learning_rate": 1.3502543699759917e-05, | |
| "loss": 0.6357567310333252, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 1.547862733293197, | |
| "grad_norm": 0.6466404497884245, | |
| "learning_rate": 1.3434577949537592e-05, | |
| "loss": 0.5758388638496399, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.5490668272125225, | |
| "grad_norm": 0.7131752950787134, | |
| "learning_rate": 1.336675712861532e-05, | |
| "loss": 0.5336190462112427, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 1.5502709211318484, | |
| "grad_norm": 0.6915472030489775, | |
| "learning_rate": 1.3299081505805088e-05, | |
| "loss": 0.5518936514854431, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.551475015051174, | |
| "grad_norm": 0.7214042959852351, | |
| "learning_rate": 1.3231551349343413e-05, | |
| "loss": 0.6907586455345154, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 1.5526791089704997, | |
| "grad_norm": 0.7321326805875424, | |
| "learning_rate": 1.3164166926890226e-05, | |
| "loss": 0.6275293827056885, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.5538832028898253, | |
| "grad_norm": 0.6243437937781886, | |
| "learning_rate": 1.3096928505527811e-05, | |
| "loss": 0.6157265901565552, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 1.5550872968091511, | |
| "grad_norm": 0.7585166939999731, | |
| "learning_rate": 1.3029836351759795e-05, | |
| "loss": 0.6015262007713318, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.5562913907284768, | |
| "grad_norm": 0.6667612051008648, | |
| "learning_rate": 1.296289073151004e-05, | |
| "loss": 0.5566407442092896, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 1.5574954846478026, | |
| "grad_norm": 0.6470430817221635, | |
| "learning_rate": 1.2896091910121666e-05, | |
| "loss": 0.5622594356536865, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.5586995785671283, | |
| "grad_norm": 0.6804382719854897, | |
| "learning_rate": 1.2829440152355915e-05, | |
| "loss": 0.5803040266036987, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 1.559903672486454, | |
| "grad_norm": 0.6687207305578332, | |
| "learning_rate": 1.2762935722391089e-05, | |
| "loss": 0.59840327501297, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.5611077664057795, | |
| "grad_norm": 0.6603082346009755, | |
| "learning_rate": 1.2696578883821614e-05, | |
| "loss": 0.5307196974754333, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 1.5623118603251054, | |
| "grad_norm": 0.7359481197118585, | |
| "learning_rate": 1.263036989965688e-05, | |
| "loss": 0.6292601823806763, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.563515954244431, | |
| "grad_norm": 0.630384514242762, | |
| "learning_rate": 1.2564309032320315e-05, | |
| "loss": 0.5643035769462585, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 1.5647200481637569, | |
| "grad_norm": 0.6173040021348626, | |
| "learning_rate": 1.2498396543648195e-05, | |
| "loss": 0.5473842620849609, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5659241420830825, | |
| "grad_norm": 0.6733910286056267, | |
| "learning_rate": 1.2432632694888724e-05, | |
| "loss": 0.5461156964302063, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 1.5671282360024081, | |
| "grad_norm": 0.6894664872560027, | |
| "learning_rate": 1.2367017746700977e-05, | |
| "loss": 0.551993191242218, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.5683323299217338, | |
| "grad_norm": 0.6222609535272838, | |
| "learning_rate": 1.2301551959153813e-05, | |
| "loss": 0.4720235764980316, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 1.5695364238410596, | |
| "grad_norm": 0.6885496364305319, | |
| "learning_rate": 1.223623559172496e-05, | |
| "loss": 0.6039336919784546, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.5707405177603853, | |
| "grad_norm": 0.6287657401109025, | |
| "learning_rate": 1.217106890329981e-05, | |
| "loss": 0.5178000926971436, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 1.5719446116797111, | |
| "grad_norm": 0.7517299077792008, | |
| "learning_rate": 1.2106052152170561e-05, | |
| "loss": 0.6386547684669495, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.5731487055990367, | |
| "grad_norm": 0.6610070710050879, | |
| "learning_rate": 1.204118559603511e-05, | |
| "loss": 0.5045919418334961, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 1.5743527995183624, | |
| "grad_norm": 0.6881944227439661, | |
| "learning_rate": 1.1976469491996028e-05, | |
| "loss": 0.5601974725723267, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.575556893437688, | |
| "grad_norm": 0.6554510558095886, | |
| "learning_rate": 1.1911904096559589e-05, | |
| "loss": 0.5212211012840271, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 1.5767609873570139, | |
| "grad_norm": 0.637358447537357, | |
| "learning_rate": 1.1847489665634709e-05, | |
| "loss": 0.5502785444259644, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.5779650812763397, | |
| "grad_norm": 0.620386240593587, | |
| "learning_rate": 1.1783226454531942e-05, | |
| "loss": 0.5157536864280701, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 1.5791691751956654, | |
| "grad_norm": 0.6585676339813008, | |
| "learning_rate": 1.1719114717962476e-05, | |
| "loss": 0.5165415406227112, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.580373269114991, | |
| "grad_norm": 0.7493791152879317, | |
| "learning_rate": 1.1655154710037153e-05, | |
| "loss": 0.6232336759567261, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 1.5815773630343166, | |
| "grad_norm": 0.6952382170280249, | |
| "learning_rate": 1.1591346684265359e-05, | |
| "loss": 0.6074533462524414, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.5827814569536423, | |
| "grad_norm": 0.6890649127414126, | |
| "learning_rate": 1.1527690893554156e-05, | |
| "loss": 0.54258131980896, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 1.583985550872968, | |
| "grad_norm": 0.7723371841016708, | |
| "learning_rate": 1.1464187590207171e-05, | |
| "loss": 0.5698084235191345, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.585189644792294, | |
| "grad_norm": 0.6803846972242944, | |
| "learning_rate": 1.1400837025923649e-05, | |
| "loss": 0.5348570346832275, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 1.5863937387116196, | |
| "grad_norm": 0.7012133606971119, | |
| "learning_rate": 1.1337639451797494e-05, | |
| "loss": 0.5426389575004578, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.5875978326309452, | |
| "grad_norm": 0.6744539282883935, | |
| "learning_rate": 1.1274595118316134e-05, | |
| "loss": 0.5523807406425476, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 1.5888019265502709, | |
| "grad_norm": 0.6537647651178288, | |
| "learning_rate": 1.1211704275359675e-05, | |
| "loss": 0.6136876344680786, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.5900060204695965, | |
| "grad_norm": 0.7246448682395509, | |
| "learning_rate": 1.1148967172199848e-05, | |
| "loss": 0.57940274477005, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 1.5912101143889223, | |
| "grad_norm": 0.7262129784220396, | |
| "learning_rate": 1.1086384057499011e-05, | |
| "loss": 0.6009513139724731, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.5924142083082482, | |
| "grad_norm": 0.6920385306362808, | |
| "learning_rate": 1.1023955179309203e-05, | |
| "loss": 0.630897045135498, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 1.5936183022275738, | |
| "grad_norm": 0.7137604613022566, | |
| "learning_rate": 1.0961680785071116e-05, | |
| "loss": 0.5550639629364014, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.5948223961468995, | |
| "grad_norm": 0.7085346864243411, | |
| "learning_rate": 1.0899561121613145e-05, | |
| "loss": 0.5286394357681274, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.596026490066225, | |
| "grad_norm": 0.8302518321935719, | |
| "learning_rate": 1.0837596435150398e-05, | |
| "loss": 0.5946967601776123, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.5972305839855507, | |
| "grad_norm": 0.6668203019912043, | |
| "learning_rate": 1.0775786971283725e-05, | |
| "loss": 0.5410081148147583, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 1.5984346779048766, | |
| "grad_norm": 0.7107307013054678, | |
| "learning_rate": 1.0714132974998731e-05, | |
| "loss": 0.6099376678466797, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.5996387718242024, | |
| "grad_norm": 0.692321863693783, | |
| "learning_rate": 1.065263469066483e-05, | |
| "loss": 0.5954639315605164, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 1.600842865743528, | |
| "grad_norm": 0.670559078654551, | |
| "learning_rate": 1.0591292362034255e-05, | |
| "loss": 0.6572100520133972, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.6020469596628537, | |
| "grad_norm": 0.7359764923754903, | |
| "learning_rate": 1.0530106232241099e-05, | |
| "loss": 0.528759777545929, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 1.6032510535821793, | |
| "grad_norm": 0.7315406198530844, | |
| "learning_rate": 1.0469076543800355e-05, | |
| "loss": 0.6525534391403198, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.604455147501505, | |
| "grad_norm": 0.6550323597585707, | |
| "learning_rate": 1.0408203538606948e-05, | |
| "loss": 0.5543904304504395, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 1.6056592414208308, | |
| "grad_norm": 0.7877187830505626, | |
| "learning_rate": 1.0347487457934768e-05, | |
| "loss": 0.6900970935821533, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.6068633353401567, | |
| "grad_norm": 0.7022130623932448, | |
| "learning_rate": 1.0286928542435754e-05, | |
| "loss": 0.5940430164337158, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 1.6080674292594823, | |
| "grad_norm": 0.7548095541194471, | |
| "learning_rate": 1.0226527032138878e-05, | |
| "loss": 0.6002829074859619, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.609271523178808, | |
| "grad_norm": 0.675148279495715, | |
| "learning_rate": 1.0166283166449253e-05, | |
| "loss": 0.6131722331047058, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 1.6104756170981336, | |
| "grad_norm": 0.7157880014315426, | |
| "learning_rate": 1.0106197184147143e-05, | |
| "loss": 0.4769801199436188, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.6116797110174592, | |
| "grad_norm": 0.7070675214972796, | |
| "learning_rate": 1.0046269323387036e-05, | |
| "loss": 0.5769504308700562, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 1.612883804936785, | |
| "grad_norm": 0.676711262352062, | |
| "learning_rate": 9.986499821696693e-06, | |
| "loss": 0.5734344720840454, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.614087898856111, | |
| "grad_norm": 0.6195920357404767, | |
| "learning_rate": 9.926888915976219e-06, | |
| "loss": 0.505179762840271, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 1.6152919927754366, | |
| "grad_norm": 0.639936444761267, | |
| "learning_rate": 9.867436842497103e-06, | |
| "loss": 0.45891082286834717, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.6164960866947622, | |
| "grad_norm": 0.6857643487164867, | |
| "learning_rate": 9.808143836901284e-06, | |
| "loss": 0.6181744933128357, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 1.6177001806140878, | |
| "grad_norm": 0.6358596598261053, | |
| "learning_rate": 9.74901013420026e-06, | |
| "loss": 0.5622484087944031, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.6189042745334135, | |
| "grad_norm": 0.6306024458716856, | |
| "learning_rate": 9.69003596877408e-06, | |
| "loss": 0.5141526460647583, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 1.6201083684527393, | |
| "grad_norm": 0.7029423890190829, | |
| "learning_rate": 9.631221574370491e-06, | |
| "loss": 0.6316851377487183, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.6213124623720652, | |
| "grad_norm": 0.6940021087538228, | |
| "learning_rate": 9.572567184103958e-06, | |
| "loss": 0.6254806518554688, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 1.6225165562913908, | |
| "grad_norm": 0.6404210309720665, | |
| "learning_rate": 9.514073030454762e-06, | |
| "loss": 0.5518223643302917, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.6237206502107164, | |
| "grad_norm": 0.6804784668574658, | |
| "learning_rate": 9.455739345268088e-06, | |
| "loss": 0.6273688673973083, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 1.624924744130042, | |
| "grad_norm": 0.6862326525850346, | |
| "learning_rate": 9.39756635975308e-06, | |
| "loss": 0.545397937297821, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6261288380493677, | |
| "grad_norm": 0.6998552571990925, | |
| "learning_rate": 9.339554304481951e-06, | |
| "loss": 0.6098823547363281, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 1.6273329319686936, | |
| "grad_norm": 0.6602839846160563, | |
| "learning_rate": 9.281703409389043e-06, | |
| "loss": 0.5640820264816284, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.6285370258880194, | |
| "grad_norm": 0.6404985903308311, | |
| "learning_rate": 9.224013903769946e-06, | |
| "loss": 0.5460361838340759, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 1.629741119807345, | |
| "grad_norm": 0.7310242255012503, | |
| "learning_rate": 9.166486016280562e-06, | |
| "loss": 0.6130272150039673, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.6309452137266707, | |
| "grad_norm": 0.6884079103542232, | |
| "learning_rate": 9.109119974936214e-06, | |
| "loss": 0.6136751174926758, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 1.6321493076459963, | |
| "grad_norm": 0.6529933620631292, | |
| "learning_rate": 9.051916007110734e-06, | |
| "loss": 0.6048874855041504, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.633353401565322, | |
| "grad_norm": 0.6395968359912824, | |
| "learning_rate": 8.994874339535569e-06, | |
| "loss": 0.5344066023826599, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 1.6345574954846478, | |
| "grad_norm": 0.6918279947344644, | |
| "learning_rate": 8.937995198298876e-06, | |
| "loss": 0.5948113799095154, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.6357615894039736, | |
| "grad_norm": 0.749627446048807, | |
| "learning_rate": 8.881278808844634e-06, | |
| "loss": 0.7015585899353027, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 1.6369656833232993, | |
| "grad_norm": 0.6513184099746449, | |
| "learning_rate": 8.824725395971745e-06, | |
| "loss": 0.5625496506690979, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.638169777242625, | |
| "grad_norm": 0.70238524053711, | |
| "learning_rate": 8.768335183833132e-06, | |
| "loss": 0.5731056928634644, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 1.6393738711619505, | |
| "grad_norm": 0.5922414879341457, | |
| "learning_rate": 8.712108395934881e-06, | |
| "loss": 0.5841649174690247, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.6405779650812762, | |
| "grad_norm": 0.7120993016869709, | |
| "learning_rate": 8.656045255135314e-06, | |
| "loss": 0.6364700794219971, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 1.641782059000602, | |
| "grad_norm": 0.6823205060765781, | |
| "learning_rate": 8.600145983644148e-06, | |
| "loss": 0.5725845694541931, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.642986152919928, | |
| "grad_norm": 0.7198857587229472, | |
| "learning_rate": 8.544410803021574e-06, | |
| "loss": 0.6214855909347534, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 1.6441902468392535, | |
| "grad_norm": 0.7599273701755161, | |
| "learning_rate": 8.488839934177422e-06, | |
| "loss": 0.5974388122558594, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.6453943407585792, | |
| "grad_norm": 0.6299613926355601, | |
| "learning_rate": 8.43343359737025e-06, | |
| "loss": 0.541568398475647, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 1.6465984346779048, | |
| "grad_norm": 0.7683368730630007, | |
| "learning_rate": 8.37819201220647e-06, | |
| "loss": 0.5598741173744202, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.6478025285972304, | |
| "grad_norm": 0.6726052056551877, | |
| "learning_rate": 8.323115397639513e-06, | |
| "loss": 0.6542848348617554, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 1.6490066225165563, | |
| "grad_norm": 0.7069030163623019, | |
| "learning_rate": 8.268203971968924e-06, | |
| "loss": 0.6667160987854004, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.6502107164358821, | |
| "grad_norm": 0.7887578536010386, | |
| "learning_rate": 8.213457952839527e-06, | |
| "loss": 0.6406658887863159, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 1.6514148103552078, | |
| "grad_norm": 0.6428228995877527, | |
| "learning_rate": 8.158877557240529e-06, | |
| "loss": 0.5842369794845581, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.6526189042745334, | |
| "grad_norm": 0.6479207628949376, | |
| "learning_rate": 8.104463001504697e-06, | |
| "loss": 0.6519626379013062, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 1.653822998193859, | |
| "grad_norm": 0.646271984786154, | |
| "learning_rate": 8.05021450130744e-06, | |
| "loss": 0.672814667224884, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.6550270921131849, | |
| "grad_norm": 0.690089646787334, | |
| "learning_rate": 7.996132271666062e-06, | |
| "loss": 0.6398512125015259, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.6562311860325105, | |
| "grad_norm": 0.7142843238137437, | |
| "learning_rate": 7.942216526938795e-06, | |
| "loss": 0.6502315998077393, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.6574352799518364, | |
| "grad_norm": 0.7017333242505988, | |
| "learning_rate": 7.888467480824003e-06, | |
| "loss": 0.5540474653244019, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 1.658639373871162, | |
| "grad_norm": 0.6112300966080016, | |
| "learning_rate": 7.83488534635936e-06, | |
| "loss": 0.5615662336349487, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.6598434677904876, | |
| "grad_norm": 0.6239463125390158, | |
| "learning_rate": 7.78147033592091e-06, | |
| "loss": 0.5031113028526306, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 1.6610475617098133, | |
| "grad_norm": 0.6256029475727574, | |
| "learning_rate": 7.728222661222372e-06, | |
| "loss": 0.5563163161277771, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.6622516556291391, | |
| "grad_norm": 0.6174721327715081, | |
| "learning_rate": 7.675142533314172e-06, | |
| "loss": 0.5445525050163269, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 1.6634557495484648, | |
| "grad_norm": 0.7024225984065051, | |
| "learning_rate": 7.622230162582656e-06, | |
| "loss": 0.645501971244812, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.6646598434677906, | |
| "grad_norm": 0.669124661367454, | |
| "learning_rate": 7.569485758749262e-06, | |
| "loss": 0.591521680355072, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 1.6658639373871162, | |
| "grad_norm": 0.6527263767067266, | |
| "learning_rate": 7.5169095308696865e-06, | |
| "loss": 0.5606772899627686, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.6670680313064419, | |
| "grad_norm": 0.7128844407615076, | |
| "learning_rate": 7.4645016873330366e-06, | |
| "loss": 0.5577992796897888, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 1.6682721252257675, | |
| "grad_norm": 0.7093763173916899, | |
| "learning_rate": 7.412262435861023e-06, | |
| "loss": 0.6242444515228271, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.6694762191450934, | |
| "grad_norm": 0.636491032457578, | |
| "learning_rate": 7.360191983507153e-06, | |
| "loss": 0.6403906345367432, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 1.670680313064419, | |
| "grad_norm": 0.6790989386676121, | |
| "learning_rate": 7.308290536655832e-06, | |
| "loss": 0.5881612300872803, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 1.6718844069837449, | |
| "grad_norm": 0.6914623079540592, | |
| "learning_rate": 7.256558301021665e-06, | |
| "loss": 0.6011528968811035, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 1.6730885009030705, | |
| "grad_norm": 0.6449236469256695, | |
| "learning_rate": 7.2049954816485465e-06, | |
| "loss": 0.5145377516746521, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.6742925948223961, | |
| "grad_norm": 0.6402647292844768, | |
| "learning_rate": 7.153602282908877e-06, | |
| "loss": 0.46896347403526306, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 1.6754966887417218, | |
| "grad_norm": 0.9044826757800251, | |
| "learning_rate": 7.102378908502783e-06, | |
| "loss": 0.5730105638504028, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 1.6767007826610476, | |
| "grad_norm": 0.6755349959326966, | |
| "learning_rate": 7.051325561457217e-06, | |
| "loss": 0.49916258454322815, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 1.6779048765803732, | |
| "grad_norm": 0.6081660544641575, | |
| "learning_rate": 7.000442444125299e-06, | |
| "loss": 0.581791877746582, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 1.679108970499699, | |
| "grad_norm": 0.6568587395069151, | |
| "learning_rate": 6.949729758185386e-06, | |
| "loss": 0.5793961882591248, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 1.6803130644190247, | |
| "grad_norm": 0.6045398063594519, | |
| "learning_rate": 6.899187704640325e-06, | |
| "loss": 0.5350834131240845, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 1.6815171583383504, | |
| "grad_norm": 0.703670828010958, | |
| "learning_rate": 6.848816483816672e-06, | |
| "loss": 0.6520372629165649, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 1.682721252257676, | |
| "grad_norm": 0.6930075174428402, | |
| "learning_rate": 6.798616295363819e-06, | |
| "loss": 0.6430896520614624, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 1.6839253461770018, | |
| "grad_norm": 0.7730269428876103, | |
| "learning_rate": 6.748587338253337e-06, | |
| "loss": 0.6163848638534546, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 1.6851294400963275, | |
| "grad_norm": 0.6286066574174697, | |
| "learning_rate": 6.698729810778065e-06, | |
| "loss": 0.5772340297698975, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.6863335340156533, | |
| "grad_norm": 0.7294910037496207, | |
| "learning_rate": 6.649043910551394e-06, | |
| "loss": 0.5512592792510986, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 1.687537627934979, | |
| "grad_norm": 0.6244222286619725, | |
| "learning_rate": 6.59952983450643e-06, | |
| "loss": 0.6313691139221191, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 1.6887417218543046, | |
| "grad_norm": 0.6734978231377051, | |
| "learning_rate": 6.550187778895245e-06, | |
| "loss": 0.5909968614578247, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 1.6899458157736302, | |
| "grad_norm": 0.6618866839532577, | |
| "learning_rate": 6.501017939288145e-06, | |
| "loss": 0.5310465693473816, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 1.691149909692956, | |
| "grad_norm": 0.8607611105372066, | |
| "learning_rate": 6.452020510572798e-06, | |
| "loss": 0.6151556968688965, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 1.6923540036122817, | |
| "grad_norm": 0.7005150491998585, | |
| "learning_rate": 6.4031956869535446e-06, | |
| "loss": 0.5600522756576538, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 1.6935580975316076, | |
| "grad_norm": 0.6913763418991744, | |
| "learning_rate": 6.354543661950552e-06, | |
| "loss": 0.6204140186309814, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 1.6947621914509332, | |
| "grad_norm": 0.6717318012350875, | |
| "learning_rate": 6.3060646283991106e-06, | |
| "loss": 0.5715824365615845, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.6959662853702588, | |
| "grad_norm": 0.6804374521308102, | |
| "learning_rate": 6.257758778448886e-06, | |
| "loss": 0.6015537977218628, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 1.6971703792895845, | |
| "grad_norm": 0.7197309714970597, | |
| "learning_rate": 6.20962630356306e-06, | |
| "loss": 0.6230743527412415, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.6983744732089103, | |
| "grad_norm": 0.6084567912170616, | |
| "learning_rate": 6.1616673945176836e-06, | |
| "loss": 0.5166609287261963, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 1.699578567128236, | |
| "grad_norm": 0.7166780063988029, | |
| "learning_rate": 6.113882241400804e-06, | |
| "loss": 0.5911763310432434, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 1.7007826610475618, | |
| "grad_norm": 0.680644684252157, | |
| "learning_rate": 6.0662710336118425e-06, | |
| "loss": 0.5495004653930664, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 1.7019867549668874, | |
| "grad_norm": 0.6541568159495065, | |
| "learning_rate": 6.018833959860753e-06, | |
| "loss": 0.5092551708221436, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 1.703190848886213, | |
| "grad_norm": 0.7016217840045879, | |
| "learning_rate": 5.971571208167298e-06, | |
| "loss": 0.5613583326339722, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 1.7043949428055387, | |
| "grad_norm": 0.6856085040176679, | |
| "learning_rate": 5.9244829658602845e-06, | |
| "loss": 0.5862411260604858, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 1.7055990367248646, | |
| "grad_norm": 0.6591642311688184, | |
| "learning_rate": 5.87756941957685e-06, | |
| "loss": 0.5513682961463928, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 1.7068031306441902, | |
| "grad_norm": 0.7349519639802647, | |
| "learning_rate": 5.830830755261751e-06, | |
| "loss": 0.6334446668624878, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 1.708007224563516, | |
| "grad_norm": 0.73882863177204, | |
| "learning_rate": 5.784267158166551e-06, | |
| "loss": 0.6603875160217285, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 1.7092113184828417, | |
| "grad_norm": 0.7095238297236984, | |
| "learning_rate": 5.737878812848929e-06, | |
| "loss": 0.6795850396156311, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.7104154124021673, | |
| "grad_norm": 0.6563617435177955, | |
| "learning_rate": 5.691665903171939e-06, | |
| "loss": 0.5577661991119385, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 1.711619506321493, | |
| "grad_norm": 0.6926805845112587, | |
| "learning_rate": 5.6456286123032884e-06, | |
| "loss": 0.5532077550888062, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.7128236002408188, | |
| "grad_norm": 0.757840187820166, | |
| "learning_rate": 5.599767122714627e-06, | |
| "loss": 0.6900238394737244, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 1.7140276941601444, | |
| "grad_norm": 0.6736091606976441, | |
| "learning_rate": 5.554081616180801e-06, | |
| "loss": 0.671580970287323, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 1.7152317880794703, | |
| "grad_norm": 0.8110605807535137, | |
| "learning_rate": 5.508572273779089e-06, | |
| "loss": 0.6170952916145325, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.716435881998796, | |
| "grad_norm": 0.801663317237608, | |
| "learning_rate": 5.4632392758885985e-06, | |
| "loss": 0.6166957020759583, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 1.7176399759181216, | |
| "grad_norm": 0.7132870889967348, | |
| "learning_rate": 5.418082802189434e-06, | |
| "loss": 0.6364924907684326, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 1.7188440698374472, | |
| "grad_norm": 0.6382040908202296, | |
| "learning_rate": 5.37310303166208e-06, | |
| "loss": 0.5974953174591064, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 1.720048163756773, | |
| "grad_norm": 0.6293311603232586, | |
| "learning_rate": 5.328300142586629e-06, | |
| "loss": 0.5622908473014832, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 1.7212522576760987, | |
| "grad_norm": 0.7087979265244937, | |
| "learning_rate": 5.283674312542064e-06, | |
| "loss": 0.6232190132141113, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.7224563515954245, | |
| "grad_norm": 0.6819379024095552, | |
| "learning_rate": 5.239225718405616e-06, | |
| "loss": 0.5627528429031372, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 1.7236604455147502, | |
| "grad_norm": 0.6636651792900646, | |
| "learning_rate": 5.194954536352021e-06, | |
| "loss": 0.5178531408309937, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 1.7248645394340758, | |
| "grad_norm": 0.6221452686560454, | |
| "learning_rate": 5.150860941852842e-06, | |
| "loss": 0.5021407604217529, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 1.7260686333534014, | |
| "grad_norm": 0.7076986193166933, | |
| "learning_rate": 5.106945109675759e-06, | |
| "loss": 0.5684460997581482, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 0.7586911684097273, | |
| "learning_rate": 5.0632072138838584e-06, | |
| "loss": 0.622063159942627, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 1.728476821192053, | |
| "grad_norm": 0.6432313034645677, | |
| "learning_rate": 5.019647427834978e-06, | |
| "loss": 0.5491294860839844, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 1.7296809151113788, | |
| "grad_norm": 0.7126328051936873, | |
| "learning_rate": 4.9762659241810285e-06, | |
| "loss": 0.6418676376342773, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 1.7308850090307044, | |
| "grad_norm": 0.7245289716767404, | |
| "learning_rate": 4.933062874867267e-06, | |
| "loss": 0.6219337582588196, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 1.73208910295003, | |
| "grad_norm": 0.6652968238429772, | |
| "learning_rate": 4.8900384511316234e-06, | |
| "loss": 0.5271619558334351, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 1.7332931968693557, | |
| "grad_norm": 0.6528164827219402, | |
| "learning_rate": 4.847192823504054e-06, | |
| "loss": 0.5417010188102722, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.7344972907886815, | |
| "grad_norm": 0.6721322436180379, | |
| "learning_rate": 4.804526161805833e-06, | |
| "loss": 0.4796888530254364, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 1.7357013847080074, | |
| "grad_norm": 0.6753296354279802, | |
| "learning_rate": 4.7620386351489185e-06, | |
| "loss": 0.5415114164352417, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 1.736905478627333, | |
| "grad_norm": 0.8239056192797215, | |
| "learning_rate": 4.7197304119352355e-06, | |
| "loss": 0.6290271282196045, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 1.7381095725466587, | |
| "grad_norm": 0.7452282782525684, | |
| "learning_rate": 4.6776016598560124e-06, | |
| "loss": 0.5112418532371521, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 1.7393136664659843, | |
| "grad_norm": 0.8020314546802029, | |
| "learning_rate": 4.635652545891156e-06, | |
| "loss": 0.5837382674217224, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 1.74051776038531, | |
| "grad_norm": 0.6592594691489769, | |
| "learning_rate": 4.5938832363085495e-06, | |
| "loss": 0.5531149506568909, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 1.7417218543046358, | |
| "grad_norm": 0.7155783650823891, | |
| "learning_rate": 4.552293896663451e-06, | |
| "loss": 0.5711277723312378, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 1.7429259482239616, | |
| "grad_norm": 0.7090330366794828, | |
| "learning_rate": 4.510884691797751e-06, | |
| "loss": 0.6422768831253052, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 1.7441300421432873, | |
| "grad_norm": 0.6767776010565582, | |
| "learning_rate": 4.469655785839377e-06, | |
| "loss": 0.5116891860961914, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 1.745334136062613, | |
| "grad_norm": 0.7374313697530425, | |
| "learning_rate": 4.428607342201635e-06, | |
| "loss": 0.6454214453697205, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.7465382299819385, | |
| "grad_norm": 0.7319555301101304, | |
| "learning_rate": 4.387739523582551e-06, | |
| "loss": 0.6378218531608582, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 1.7477423239012642, | |
| "grad_norm": 0.6048272822670632, | |
| "learning_rate": 4.347052491964265e-06, | |
| "loss": 0.5626097321510315, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 1.74894641782059, | |
| "grad_norm": 0.6527263143361477, | |
| "learning_rate": 4.306546408612306e-06, | |
| "loss": 0.5640212297439575, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 1.7501505117399159, | |
| "grad_norm": 0.6880645867716209, | |
| "learning_rate": 4.26622143407503e-06, | |
| "loss": 0.5336100459098816, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 1.7513546056592415, | |
| "grad_norm": 0.699862911543868, | |
| "learning_rate": 4.226077728182959e-06, | |
| "loss": 0.5081793069839478, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 1.7525586995785671, | |
| "grad_norm": 0.7137368223623376, | |
| "learning_rate": 4.186115450048128e-06, | |
| "loss": 0.5804829001426697, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 1.7537627934978928, | |
| "grad_norm": 0.7289237634290167, | |
| "learning_rate": 4.1463347580635195e-06, | |
| "loss": 0.5615112781524658, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 1.7549668874172184, | |
| "grad_norm": 0.7339170280419827, | |
| "learning_rate": 4.106735809902324e-06, | |
| "loss": 0.5764937996864319, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 1.7561709813365443, | |
| "grad_norm": 0.7654362997491899, | |
| "learning_rate": 4.0673187625174195e-06, | |
| "loss": 0.5667378902435303, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 1.7573750752558701, | |
| "grad_norm": 0.7182102826832707, | |
| "learning_rate": 4.028083772140689e-06, | |
| "loss": 0.5884034633636475, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.7585791691751957, | |
| "grad_norm": 0.6300590195339411, | |
| "learning_rate": 3.989030994282434e-06, | |
| "loss": 0.5312051773071289, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 1.7597832630945214, | |
| "grad_norm": 0.6928354046394739, | |
| "learning_rate": 3.950160583730761e-06, | |
| "loss": 0.606346607208252, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 1.760987357013847, | |
| "grad_norm": 0.6289904720451077, | |
| "learning_rate": 3.911472694550916e-06, | |
| "loss": 0.48723918199539185, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 1.7621914509331726, | |
| "grad_norm": 0.6938945943340636, | |
| "learning_rate": 3.872967480084727e-06, | |
| "loss": 0.594094455242157, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 1.7633955448524985, | |
| "grad_norm": 0.6454574999652682, | |
| "learning_rate": 3.834645092949973e-06, | |
| "loss": 0.5265861749649048, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 1.7645996387718244, | |
| "grad_norm": 0.6402068002340231, | |
| "learning_rate": 3.796505685039825e-06, | |
| "loss": 0.5821572542190552, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 1.76580373269115, | |
| "grad_norm": 0.6215539747644288, | |
| "learning_rate": 3.758549407522144e-06, | |
| "loss": 0.5699132680892944, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 1.7670078266104756, | |
| "grad_norm": 0.6494984773034171, | |
| "learning_rate": 3.720776410838983e-06, | |
| "loss": 0.4552852511405945, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 1.7682119205298013, | |
| "grad_norm": 0.7818456595599201, | |
| "learning_rate": 3.6831868447059324e-06, | |
| "loss": 0.5594058632850647, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 1.7694160144491269, | |
| "grad_norm": 0.7190550070540783, | |
| "learning_rate": 3.645780858111547e-06, | |
| "loss": 0.5849020481109619, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.7706201083684527, | |
| "grad_norm": 0.7249951662594688, | |
| "learning_rate": 3.6085585993167805e-06, | |
| "loss": 0.6334759593009949, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 1.7718242022877786, | |
| "grad_norm": 0.67842444805327, | |
| "learning_rate": 3.5715202158543125e-06, | |
| "loss": 0.5594388246536255, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 1.7730282962071042, | |
| "grad_norm": 0.8264073869124916, | |
| "learning_rate": 3.5346658545280795e-06, | |
| "loss": 0.6197201013565063, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 1.7742323901264299, | |
| "grad_norm": 0.6698909050253031, | |
| "learning_rate": 3.4979956614125953e-06, | |
| "loss": 0.5784468650817871, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 1.7754364840457555, | |
| "grad_norm": 0.7016937116133866, | |
| "learning_rate": 3.4615097818524235e-06, | |
| "loss": 0.5452237725257874, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.7766405779650811, | |
| "grad_norm": 0.7086957002805386, | |
| "learning_rate": 3.4252083604616182e-06, | |
| "loss": 0.5750047564506531, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 1.777844671884407, | |
| "grad_norm": 0.7512169881958669, | |
| "learning_rate": 3.389091541123074e-06, | |
| "loss": 0.6154974699020386, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 1.7790487658037328, | |
| "grad_norm": 0.7042299855461226, | |
| "learning_rate": 3.353159466988032e-06, | |
| "loss": 0.5605722665786743, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 1.7802528597230585, | |
| "grad_norm": 0.630558437887846, | |
| "learning_rate": 3.3174122804754738e-06, | |
| "loss": 0.5536834001541138, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 1.781456953642384, | |
| "grad_norm": 0.6794569037867353, | |
| "learning_rate": 3.2818501232715794e-06, | |
| "loss": 0.5650051236152649, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.7826610475617097, | |
| "grad_norm": 0.7591191616699784, | |
| "learning_rate": 3.246473136329148e-06, | |
| "loss": 0.51902836561203, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 1.7838651414810354, | |
| "grad_norm": 0.707909160129663, | |
| "learning_rate": 3.211281459867038e-06, | |
| "loss": 0.5955489873886108, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 1.7850692354003612, | |
| "grad_norm": 0.703632152706884, | |
| "learning_rate": 3.1762752333696297e-06, | |
| "loss": 0.5627739429473877, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 1.786273329319687, | |
| "grad_norm": 0.7499834212374759, | |
| "learning_rate": 3.141454595586252e-06, | |
| "loss": 0.5941311717033386, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 1.7874774232390127, | |
| "grad_norm": 0.7497386383473793, | |
| "learning_rate": 3.1068196845306487e-06, | |
| "loss": 0.6763837933540344, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 1.7886815171583383, | |
| "grad_norm": 0.6661680797779713, | |
| "learning_rate": 3.072370637480415e-06, | |
| "loss": 0.5017702579498291, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 1.789885611077664, | |
| "grad_norm": 0.6976050548874814, | |
| "learning_rate": 3.0381075909764744e-06, | |
| "loss": 0.5178176164627075, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 1.7910897049969896, | |
| "grad_norm": 0.7283625311783152, | |
| "learning_rate": 3.004030680822517e-06, | |
| "loss": 0.6162016987800598, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 1.7922937989163155, | |
| "grad_norm": 0.6842189977654847, | |
| "learning_rate": 2.9701400420844737e-06, | |
| "loss": 0.6359889507293701, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 1.7934978928356413, | |
| "grad_norm": 0.6757745138342321, | |
| "learning_rate": 2.9364358090899766e-06, | |
| "loss": 0.6323922276496887, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.794701986754967, | |
| "grad_norm": 0.6901477256781282, | |
| "learning_rate": 2.9029181154278274e-06, | |
| "loss": 0.6050222516059875, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 1.7959060806742926, | |
| "grad_norm": 0.7171202358809439, | |
| "learning_rate": 2.8695870939474624e-06, | |
| "loss": 0.5529733896255493, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 1.7971101745936182, | |
| "grad_norm": 0.6669003093248148, | |
| "learning_rate": 2.836442876758438e-06, | |
| "loss": 0.5076313018798828, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 1.7983142685129438, | |
| "grad_norm": 0.7269113877004387, | |
| "learning_rate": 2.8034855952299045e-06, | |
| "loss": 0.5276248455047607, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 1.7995183624322697, | |
| "grad_norm": 0.723355977685466, | |
| "learning_rate": 2.770715379990069e-06, | |
| "loss": 0.6064050793647766, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 1.8007224563515956, | |
| "grad_norm": 0.8243187220777127, | |
| "learning_rate": 2.7381323609256937e-06, | |
| "loss": 0.5484981536865234, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 1.8019265502709212, | |
| "grad_norm": 0.7039071786277653, | |
| "learning_rate": 2.7057366671815856e-06, | |
| "loss": 0.5383074283599854, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 1.8031306441902468, | |
| "grad_norm": 0.6667856491817752, | |
| "learning_rate": 2.6735284271600657e-06, | |
| "loss": 0.547199547290802, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 1.8043347381095725, | |
| "grad_norm": 0.6492279453493423, | |
| "learning_rate": 2.641507768520479e-06, | |
| "loss": 0.5876493453979492, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 1.805538832028898, | |
| "grad_norm": 0.6305483892926103, | |
| "learning_rate": 2.6096748181786758e-06, | |
| "loss": 0.4917656183242798, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.806742925948224, | |
| "grad_norm": 0.8067515837423501, | |
| "learning_rate": 2.5780297023065057e-06, | |
| "loss": 0.6041967272758484, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 1.8079470198675498, | |
| "grad_norm": 0.6999590307341557, | |
| "learning_rate": 2.546572546331338e-06, | |
| "loss": 0.6243309378623962, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 1.8091511137868754, | |
| "grad_norm": 0.6758862291871842, | |
| "learning_rate": 2.5153034749355487e-06, | |
| "loss": 0.5620877742767334, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 1.810355207706201, | |
| "grad_norm": 0.7545413968501355, | |
| "learning_rate": 2.4842226120560255e-06, | |
| "loss": 0.6363770365715027, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 1.8115593016255267, | |
| "grad_norm": 0.6875005723476368, | |
| "learning_rate": 2.4533300808836757e-06, | |
| "loss": 0.6246600151062012, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 1.8127633955448526, | |
| "grad_norm": 0.6838453782037119, | |
| "learning_rate": 2.4226260038629545e-06, | |
| "loss": 0.5130318999290466, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 1.8139674894641782, | |
| "grad_norm": 0.6661804546469481, | |
| "learning_rate": 2.3921105026913527e-06, | |
| "loss": 0.5225271582603455, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 1.815171583383504, | |
| "grad_norm": 0.7559286846781317, | |
| "learning_rate": 2.3617836983189366e-06, | |
| "loss": 0.5840597152709961, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 1.8163756773028297, | |
| "grad_norm": 0.7015898859383557, | |
| "learning_rate": 2.3316457109478716e-06, | |
| "loss": 0.576333224773407, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 1.8175797712221553, | |
| "grad_norm": 0.7325334460929459, | |
| "learning_rate": 2.3016966600319154e-06, | |
| "loss": 0.6196116805076599, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.818783865141481, | |
| "grad_norm": 0.7238971994565907, | |
| "learning_rate": 2.2719366642759754e-06, | |
| "loss": 0.6115279197692871, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 1.8199879590608068, | |
| "grad_norm": 0.6615840220669031, | |
| "learning_rate": 2.2423658416356296e-06, | |
| "loss": 0.5739059448242188, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 1.8211920529801324, | |
| "grad_norm": 0.6438818574320542, | |
| "learning_rate": 2.212984309316646e-06, | |
| "loss": 0.5544348955154419, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 1.8223961468994583, | |
| "grad_norm": 0.6887199478226267, | |
| "learning_rate": 2.183792183774541e-06, | |
| "loss": 0.5506141781806946, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 1.823600240818784, | |
| "grad_norm": 0.630566851328714, | |
| "learning_rate": 2.1547895807141004e-06, | |
| "loss": 0.5032554864883423, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 1.8248043347381095, | |
| "grad_norm": 0.6613185082560529, | |
| "learning_rate": 2.125976615088926e-06, | |
| "loss": 0.6159868240356445, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 1.8260084286574352, | |
| "grad_norm": 0.7007602126683692, | |
| "learning_rate": 2.0973534011009823e-06, | |
| "loss": 0.5271652936935425, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 1.827212522576761, | |
| "grad_norm": 0.6991109886060091, | |
| "learning_rate": 2.0689200522001294e-06, | |
| "loss": 0.5943084955215454, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 1.8284166164960867, | |
| "grad_norm": 0.6317388395635505, | |
| "learning_rate": 2.040676681083703e-06, | |
| "loss": 0.5195907354354858, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 1.8296207104154125, | |
| "grad_norm": 0.6614371125543239, | |
| "learning_rate": 2.01262339969604e-06, | |
| "loss": 0.5257569551467896, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.8308248043347382, | |
| "grad_norm": 0.6721043989031192, | |
| "learning_rate": 1.9847603192280515e-06, | |
| "loss": 0.5383394956588745, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 1.8320288982540638, | |
| "grad_norm": 0.6516699678388052, | |
| "learning_rate": 1.957087550116765e-06, | |
| "loss": 0.6025786995887756, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 1.8332329921733894, | |
| "grad_norm": 0.7086815397016576, | |
| "learning_rate": 1.929605202044904e-06, | |
| "loss": 0.624287486076355, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 1.8344370860927153, | |
| "grad_norm": 0.7095880324856938, | |
| "learning_rate": 1.9023133839404517e-06, | |
| "loss": 0.609376847743988, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 1.835641180012041, | |
| "grad_norm": 0.6952599672746529, | |
| "learning_rate": 1.875212203976201e-06, | |
| "loss": 0.578833281993866, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 1.8368452739313668, | |
| "grad_norm": 0.6335110011108259, | |
| "learning_rate": 1.8483017695693494e-06, | |
| "loss": 0.49487796425819397, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 1.8380493678506924, | |
| "grad_norm": 0.725572265875903, | |
| "learning_rate": 1.8215821873810601e-06, | |
| "loss": 0.644855260848999, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 1.839253461770018, | |
| "grad_norm": 0.775538378700794, | |
| "learning_rate": 1.7950535633160403e-06, | |
| "loss": 0.5681218504905701, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 1.8404575556893437, | |
| "grad_norm": 0.7179662032057308, | |
| "learning_rate": 1.768716002522125e-06, | |
| "loss": 0.5964992642402649, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 1.8416616496086695, | |
| "grad_norm": 0.6403003850145351, | |
| "learning_rate": 1.7425696093898548e-06, | |
| "loss": 0.4977972209453583, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.8428657435279951, | |
| "grad_norm": 0.7228884489273776, | |
| "learning_rate": 1.7166144875520763e-06, | |
| "loss": 0.5850366950035095, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 1.844069837447321, | |
| "grad_norm": 0.695494118116519, | |
| "learning_rate": 1.6908507398834927e-06, | |
| "loss": 0.5370050668716431, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 1.8452739313666466, | |
| "grad_norm": 0.6719266812533203, | |
| "learning_rate": 1.6652784685003197e-06, | |
| "loss": 0.5704593658447266, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 1.8464780252859723, | |
| "grad_norm": 0.6905237547910387, | |
| "learning_rate": 1.6398977747598243e-06, | |
| "loss": 0.6007063984870911, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 1.847682119205298, | |
| "grad_norm": 0.6724829526221116, | |
| "learning_rate": 1.614708759259942e-06, | |
| "loss": 0.5665451288223267, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 1.8488862131246238, | |
| "grad_norm": 0.8393382950462466, | |
| "learning_rate": 1.5897115218388936e-06, | |
| "loss": 0.6753185987472534, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.8500903070439494, | |
| "grad_norm": 0.6494036890210806, | |
| "learning_rate": 1.564906161574764e-06, | |
| "loss": 0.4877506494522095, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 1.8512944009632752, | |
| "grad_norm": 0.68916513427791, | |
| "learning_rate": 1.5402927767851239e-06, | |
| "loss": 0.5553507208824158, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 1.8524984948826009, | |
| "grad_norm": 0.723218474778968, | |
| "learning_rate": 1.5158714650266414e-06, | |
| "loss": 0.5621401071548462, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 1.8537025888019265, | |
| "grad_norm": 0.7184256940054815, | |
| "learning_rate": 1.4916423230946885e-06, | |
| "loss": 0.5810825824737549, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.8549066827212521, | |
| "grad_norm": 0.7104079844650215, | |
| "learning_rate": 1.4676054470229517e-06, | |
| "loss": 0.5943775177001953, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 1.856110776640578, | |
| "grad_norm": 0.6415428432253708, | |
| "learning_rate": 1.443760932083077e-06, | |
| "loss": 0.5965542793273926, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 1.8573148705599036, | |
| "grad_norm": 0.6686962536908114, | |
| "learning_rate": 1.4201088727842648e-06, | |
| "loss": 0.5232040882110596, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 1.8585189644792295, | |
| "grad_norm": 0.7009469795092166, | |
| "learning_rate": 1.3966493628729039e-06, | |
| "loss": 0.6138120293617249, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 1.8597230583985551, | |
| "grad_norm": 0.6208974700635488, | |
| "learning_rate": 1.373382495332215e-06, | |
| "loss": 0.49354854226112366, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 1.8609271523178808, | |
| "grad_norm": 0.692130552786189, | |
| "learning_rate": 1.3503083623818412e-06, | |
| "loss": 0.6931569576263428, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 1.8621312462372064, | |
| "grad_norm": 0.765254055365152, | |
| "learning_rate": 1.3274270554775425e-06, | |
| "loss": 0.7060236930847168, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 1.8633353401565322, | |
| "grad_norm": 0.699732493479322, | |
| "learning_rate": 1.3047386653107784e-06, | |
| "loss": 0.606826663017273, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.8645394340758579, | |
| "grad_norm": 0.6674193143847964, | |
| "learning_rate": 1.282243281808393e-06, | |
| "loss": 0.674689531326294, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 1.8657435279951837, | |
| "grad_norm": 0.6577773378574601, | |
| "learning_rate": 1.2599409941322081e-06, | |
| "loss": 0.5309125781059265, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.8669476219145094, | |
| "grad_norm": 0.7046706962824838, | |
| "learning_rate": 1.2378318906787145e-06, | |
| "loss": 0.5600446462631226, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 1.868151715833835, | |
| "grad_norm": 0.6566519822968615, | |
| "learning_rate": 1.2159160590787143e-06, | |
| "loss": 0.5337668657302856, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 1.8693558097531606, | |
| "grad_norm": 0.694918375362827, | |
| "learning_rate": 1.1941935861969455e-06, | |
| "loss": 0.573444664478302, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 1.8705599036724865, | |
| "grad_norm": 0.6117497741372008, | |
| "learning_rate": 1.1726645581317752e-06, | |
| "loss": 0.5877199769020081, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 1.8717639975918121, | |
| "grad_norm": 0.6849649765191942, | |
| "learning_rate": 1.1513290602148174e-06, | |
| "loss": 0.5103924870491028, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 1.872968091511138, | |
| "grad_norm": 0.6666116162172622, | |
| "learning_rate": 1.1301871770106332e-06, | |
| "loss": 0.5621691942214966, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 1.8741721854304636, | |
| "grad_norm": 0.6918792000850436, | |
| "learning_rate": 1.1092389923163915e-06, | |
| "loss": 0.585303783416748, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 1.8753762793497892, | |
| "grad_norm": 0.7628439993607844, | |
| "learning_rate": 1.0884845891614925e-06, | |
| "loss": 0.6140367984771729, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 1.8765803732691149, | |
| "grad_norm": 0.6740049850142918, | |
| "learning_rate": 1.0679240498073118e-06, | |
| "loss": 0.5484222173690796, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 1.8777844671884407, | |
| "grad_norm": 0.7523385412885472, | |
| "learning_rate": 1.0475574557467837e-06, | |
| "loss": 0.5650358200073242, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.8789885611077664, | |
| "grad_norm": 0.6880191801421335, | |
| "learning_rate": 1.0273848877041802e-06, | |
| "loss": 0.47756820917129517, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 1.8801926550270922, | |
| "grad_norm": 0.7132237427539956, | |
| "learning_rate": 1.0074064256347104e-06, | |
| "loss": 0.5750943422317505, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 1.8813967489464178, | |
| "grad_norm": 0.6463908501083647, | |
| "learning_rate": 9.876221487242322e-07, | |
| "loss": 0.5670018196105957, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 1.8826008428657435, | |
| "grad_norm": 0.7603473365673563, | |
| "learning_rate": 9.680321353889576e-07, | |
| "loss": 0.5906625986099243, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 1.883804936785069, | |
| "grad_norm": 0.6859465389776784, | |
| "learning_rate": 9.486364632750878e-07, | |
| "loss": 0.5674991607666016, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 1.885009030704395, | |
| "grad_norm": 0.6769950863239539, | |
| "learning_rate": 9.294352092585779e-07, | |
| "loss": 0.5772174596786499, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 1.8862131246237208, | |
| "grad_norm": 0.7155135900382078, | |
| "learning_rate": 9.104284494447779e-07, | |
| "loss": 0.5847698450088501, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 1.8874172185430464, | |
| "grad_norm": 0.7238855876217561, | |
| "learning_rate": 8.916162591681543e-07, | |
| "loss": 0.5530298948287964, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 1.888621312462372, | |
| "grad_norm": 0.6526045127217193, | |
| "learning_rate": 8.729987129919682e-07, | |
| "loss": 0.5043114423751831, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 1.8898254063816977, | |
| "grad_norm": 0.7391424950916906, | |
| "learning_rate": 8.545758847080143e-07, | |
| "loss": 0.5303754806518555, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.8910295003010233, | |
| "grad_norm": 0.6712333443189676, | |
| "learning_rate": 8.363478473363107e-07, | |
| "loss": 0.577226996421814, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 1.8922335942203492, | |
| "grad_norm": 0.6981141582176953, | |
| "learning_rate": 8.183146731247982e-07, | |
| "loss": 0.5423426628112793, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 1.893437688139675, | |
| "grad_norm": 0.6311937513785054, | |
| "learning_rate": 8.004764335490856e-07, | |
| "loss": 0.48133552074432373, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 1.8946417820590007, | |
| "grad_norm": 0.6789891972188489, | |
| "learning_rate": 7.828331993121163e-07, | |
| "loss": 0.5657028555870056, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 1.8958458759783263, | |
| "grad_norm": 0.7254998928602563, | |
| "learning_rate": 7.653850403439411e-07, | |
| "loss": 0.5331858396530151, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.897049969897652, | |
| "grad_norm": 0.6608618601341222, | |
| "learning_rate": 7.481320258014124e-07, | |
| "loss": 0.5427488088607788, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 1.8982540638169776, | |
| "grad_norm": 0.6694799327623477, | |
| "learning_rate": 7.31074224067918e-07, | |
| "loss": 0.6095342636108398, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 1.8994581577363034, | |
| "grad_norm": 0.6099067398142338, | |
| "learning_rate": 7.142117027531092e-07, | |
| "loss": 0.5477356314659119, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 1.9006622516556293, | |
| "grad_norm": 0.7085200907625167, | |
| "learning_rate": 6.975445286926063e-07, | |
| "loss": 0.5689785480499268, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 1.901866345574955, | |
| "grad_norm": 0.7103499006587805, | |
| "learning_rate": 6.810727679477935e-07, | |
| "loss": 0.5828697681427002, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.9030704394942806, | |
| "grad_norm": 0.6301829347830576, | |
| "learning_rate": 6.647964858055133e-07, | |
| "loss": 0.5717979669570923, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 1.9042745334136062, | |
| "grad_norm": 0.636202610900024, | |
| "learning_rate": 6.48715746777806e-07, | |
| "loss": 0.5298157930374146, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 1.9054786273329318, | |
| "grad_norm": 0.6411930882178205, | |
| "learning_rate": 6.328306146016593e-07, | |
| "loss": 0.4738658666610718, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 1.9066827212522577, | |
| "grad_norm": 0.75869674875339, | |
| "learning_rate": 6.171411522387871e-07, | |
| "loss": 0.6281449794769287, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 1.9078868151715835, | |
| "grad_norm": 0.6766625111559003, | |
| "learning_rate": 6.016474218753288e-07, | |
| "loss": 0.6420220136642456, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 1.9090909090909092, | |
| "grad_norm": 0.6592870011567821, | |
| "learning_rate": 5.863494849216444e-07, | |
| "loss": 0.5731959342956543, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 1.9102950030102348, | |
| "grad_norm": 0.7741711986152868, | |
| "learning_rate": 5.712474020120484e-07, | |
| "loss": 0.5774096250534058, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 1.9114990969295604, | |
| "grad_norm": 0.7327164005422871, | |
| "learning_rate": 5.563412330045758e-07, | |
| "loss": 0.6661399602890015, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 1.912703190848886, | |
| "grad_norm": 0.6490076102800001, | |
| "learning_rate": 5.416310369807331e-07, | |
| "loss": 0.5940079092979431, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 1.913907284768212, | |
| "grad_norm": 0.6929070437022665, | |
| "learning_rate": 5.271168722453035e-07, | |
| "loss": 0.5624694228172302, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.9151113786875378, | |
| "grad_norm": 0.7220541731385967, | |
| "learning_rate": 5.127987963260583e-07, | |
| "loss": 0.6027872562408447, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 1.9163154726068634, | |
| "grad_norm": 0.7573965262363941, | |
| "learning_rate": 4.986768659735852e-07, | |
| "loss": 0.5407252907752991, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 1.917519566526189, | |
| "grad_norm": 0.7140587243770241, | |
| "learning_rate": 4.847511371610159e-07, | |
| "loss": 0.5981488823890686, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 1.9187236604455147, | |
| "grad_norm": 0.7092430794499894, | |
| "learning_rate": 4.710216650838317e-07, | |
| "loss": 0.6029732823371887, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 1.9199277543648403, | |
| "grad_norm": 0.7233316220665095, | |
| "learning_rate": 4.5748850415964774e-07, | |
| "loss": 0.48785698413848877, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 1.9211318482841662, | |
| "grad_norm": 0.7018765555724064, | |
| "learning_rate": 4.4415170802797333e-07, | |
| "loss": 0.627527117729187, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 1.922335942203492, | |
| "grad_norm": 0.6960703296753732, | |
| "learning_rate": 4.3101132955002396e-07, | |
| "loss": 0.5867995023727417, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 1.9235400361228177, | |
| "grad_norm": 0.7554260967626275, | |
| "learning_rate": 4.180674208084989e-07, | |
| "loss": 0.546118438243866, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 1.9247441300421433, | |
| "grad_norm": 0.6780941213732067, | |
| "learning_rate": 4.0532003310736475e-07, | |
| "loss": 0.5627431273460388, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 1.925948223961469, | |
| "grad_norm": 0.7461456750112884, | |
| "learning_rate": 3.9276921697169455e-07, | |
| "loss": 0.5974733829498291, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.9271523178807946, | |
| "grad_norm": 0.7331210435274516, | |
| "learning_rate": 3.804150221474179e-07, | |
| "loss": 0.61258864402771, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 1.9283564118001204, | |
| "grad_norm": 0.7291842214281149, | |
| "learning_rate": 3.6825749760113215e-07, | |
| "loss": 0.648840606212616, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 1.9295605057194463, | |
| "grad_norm": 0.7458366591965205, | |
| "learning_rate": 3.5629669151994725e-07, | |
| "loss": 0.6015305519104004, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 1.930764599638772, | |
| "grad_norm": 0.8293947514840635, | |
| "learning_rate": 3.4453265131124677e-07, | |
| "loss": 0.5498949289321899, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 1.9319686935580975, | |
| "grad_norm": 0.6484978877567261, | |
| "learning_rate": 3.3296542360253256e-07, | |
| "loss": 0.5641001462936401, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 1.9331727874774232, | |
| "grad_norm": 0.6847140731160282, | |
| "learning_rate": 3.2159505424122495e-07, | |
| "loss": 0.5579475164413452, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 1.9343768813967488, | |
| "grad_norm": 0.7236561832059804, | |
| "learning_rate": 3.1042158829447385e-07, | |
| "loss": 0.6008574962615967, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 1.9355809753160746, | |
| "grad_norm": 0.6680008850614204, | |
| "learning_rate": 2.9944507004900367e-07, | |
| "loss": 0.5793865919113159, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 1.9367850692354005, | |
| "grad_norm": 0.7226863078514579, | |
| "learning_rate": 2.8866554301091866e-07, | |
| "loss": 0.5802816152572632, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 1.9379891631547261, | |
| "grad_norm": 0.6066089323199368, | |
| "learning_rate": 2.780830499055476e-07, | |
| "loss": 0.41039422154426575, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.9391932570740518, | |
| "grad_norm": 0.7541554507302138, | |
| "learning_rate": 2.6769763267723845e-07, | |
| "loss": 0.7123790979385376, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 1.9403973509933774, | |
| "grad_norm": 0.6539368475453213, | |
| "learning_rate": 2.575093324892364e-07, | |
| "loss": 0.5050459504127502, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 1.941601444912703, | |
| "grad_norm": 0.6362805423411522, | |
| "learning_rate": 2.4751818972350016e-07, | |
| "loss": 0.540932297706604, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 1.9428055388320289, | |
| "grad_norm": 0.7530210700051205, | |
| "learning_rate": 2.377242439805305e-07, | |
| "loss": 0.7373020052909851, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 1.9440096327513547, | |
| "grad_norm": 0.7606614768716228, | |
| "learning_rate": 2.281275340792477e-07, | |
| "loss": 0.6150696277618408, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 1.9452137266706804, | |
| "grad_norm": 0.6667905800819639, | |
| "learning_rate": 2.187280980567863e-07, | |
| "loss": 0.6177890300750732, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 1.946417820590006, | |
| "grad_norm": 0.7280364011212537, | |
| "learning_rate": 2.095259731684007e-07, | |
| "loss": 0.535525381565094, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 1.9476219145093316, | |
| "grad_norm": 0.6612643727912537, | |
| "learning_rate": 2.0052119588727103e-07, | |
| "loss": 0.5701784491539001, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 1.9488260084286573, | |
| "grad_norm": 0.6930350525511971, | |
| "learning_rate": 1.917138019043918e-07, | |
| "loss": 0.5127325057983398, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 1.9500301023479831, | |
| "grad_norm": 0.6645178871748494, | |
| "learning_rate": 1.8310382612841125e-07, | |
| "loss": 0.5932685136795044, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.951234196267309, | |
| "grad_norm": 0.6600103793645039, | |
| "learning_rate": 1.7469130268549238e-07, | |
| "loss": 0.5705520510673523, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 1.9524382901866346, | |
| "grad_norm": 0.7037195958313917, | |
| "learning_rate": 1.6647626491919088e-07, | |
| "loss": 0.6251012086868286, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 1.9536423841059603, | |
| "grad_norm": 0.7203644868401232, | |
| "learning_rate": 1.5845874539032192e-07, | |
| "loss": 0.6126412153244019, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 1.9548464780252859, | |
| "grad_norm": 0.6893511239479428, | |
| "learning_rate": 1.5063877587681019e-07, | |
| "loss": 0.6284942626953125, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 1.9560505719446115, | |
| "grad_norm": 0.7277088854826446, | |
| "learning_rate": 1.4301638737358459e-07, | |
| "loss": 0.5494322776794434, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 1.9572546658639374, | |
| "grad_norm": 0.7044708997407658, | |
| "learning_rate": 1.3559161009246146e-07, | |
| "loss": 0.4695800542831421, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 1.9584587597832632, | |
| "grad_norm": 0.6464366555921628, | |
| "learning_rate": 1.283644734619893e-07, | |
| "loss": 0.5801255106925964, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 1.9596628537025889, | |
| "grad_norm": 0.6572737348994054, | |
| "learning_rate": 1.2133500612737103e-07, | |
| "loss": 0.5532472133636475, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 1.9608669476219145, | |
| "grad_norm": 0.6546752224566054, | |
| "learning_rate": 1.1450323595034174e-07, | |
| "loss": 0.5728014707565308, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 1.9620710415412401, | |
| "grad_norm": 0.7098695274037261, | |
| "learning_rate": 1.0786919000903562e-07, | |
| "loss": 0.6312659978866577, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.963275135460566, | |
| "grad_norm": 0.6431445514270803, | |
| "learning_rate": 1.0143289459790816e-07, | |
| "loss": 0.5799425840377808, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 1.9644792293798916, | |
| "grad_norm": 0.6719495518124794, | |
| "learning_rate": 9.519437522760299e-08, | |
| "loss": 0.5714237689971924, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 1.9656833232992175, | |
| "grad_norm": 0.752533416502612, | |
| "learning_rate": 8.915365662488518e-08, | |
| "loss": 0.5689437985420227, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 1.966887417218543, | |
| "grad_norm": 0.7692727283179666, | |
| "learning_rate": 8.331076273250249e-08, | |
| "loss": 0.6392886638641357, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 1.9680915111378687, | |
| "grad_norm": 0.7336589415290271, | |
| "learning_rate": 7.766571670913547e-08, | |
| "loss": 0.5719316005706787, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 1.9692956050571944, | |
| "grad_norm": 0.6838298911100253, | |
| "learning_rate": 7.221854092926971e-08, | |
| "loss": 0.5827243328094482, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 1.9704996989765202, | |
| "grad_norm": 0.7686224124765682, | |
| "learning_rate": 6.696925698311817e-08, | |
| "loss": 0.6425184011459351, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 1.9717037928958459, | |
| "grad_norm": 0.6532559866884581, | |
| "learning_rate": 6.191788567654344e-08, | |
| "loss": 0.5946167707443237, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 1.9729078868151717, | |
| "grad_norm": 0.6312344261575024, | |
| "learning_rate": 5.706444703096336e-08, | |
| "loss": 0.5090816617012024, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 1.9741119807344973, | |
| "grad_norm": 0.7676146358721873, | |
| "learning_rate": 5.240896028327891e-08, | |
| "loss": 0.579215407371521, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.975316074653823, | |
| "grad_norm": 0.6728133395765841, | |
| "learning_rate": 4.7951443885807524e-08, | |
| "loss": 0.5652763247489929, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 1.9765201685731486, | |
| "grad_norm": 0.6432580000680586, | |
| "learning_rate": 4.3691915506177686e-08, | |
| "loss": 0.5408779382705688, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 1.9777242624924745, | |
| "grad_norm": 0.6315126229459764, | |
| "learning_rate": 3.963039202730112e-08, | |
| "loss": 0.5882087349891663, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 1.9789283564118, | |
| "grad_norm": 0.7011158641139948, | |
| "learning_rate": 3.576688954727847e-08, | |
| "loss": 0.5176501274108887, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 1.980132450331126, | |
| "grad_norm": 0.6587097052056431, | |
| "learning_rate": 3.210142337932709e-08, | |
| "loss": 0.5108386874198914, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 1.9813365442504516, | |
| "grad_norm": 0.6425909595737344, | |
| "learning_rate": 2.8634008051758864e-08, | |
| "loss": 0.5777048468589783, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 1.9825406381697772, | |
| "grad_norm": 0.7147004863851809, | |
| "learning_rate": 2.536465730788029e-08, | |
| "loss": 0.500884473323822, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 1.9837447320891028, | |
| "grad_norm": 0.6737989717991144, | |
| "learning_rate": 2.229338410597026e-08, | |
| "loss": 0.5754954218864441, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 1.9849488260084287, | |
| "grad_norm": 0.7131714825607306, | |
| "learning_rate": 1.9420200619207907e-08, | |
| "loss": 0.5816112160682678, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 1.9861529199277543, | |
| "grad_norm": 0.7554088529786651, | |
| "learning_rate": 1.6745118235628188e-08, | |
| "loss": 0.6610931158065796, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.9873570138470802, | |
| "grad_norm": 0.7420931407483101, | |
| "learning_rate": 1.4268147558088585e-08, | |
| "loss": 0.5348343849182129, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 1.9885611077664058, | |
| "grad_norm": 0.7690940969965758, | |
| "learning_rate": 1.1989298404213588e-08, | |
| "loss": 0.5157413482666016, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 1.9897652016857315, | |
| "grad_norm": 0.6882882292897665, | |
| "learning_rate": 9.908579806361396e-09, | |
| "loss": 0.4572542905807495, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 1.990969295605057, | |
| "grad_norm": 0.6899079935771424, | |
| "learning_rate": 8.026000011596146e-09, | |
| "loss": 0.6765772700309753, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 1.992173389524383, | |
| "grad_norm": 0.6843041221046693, | |
| "learning_rate": 6.341566481626871e-09, | |
| "loss": 0.6102558374404907, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 1.9933774834437086, | |
| "grad_norm": 0.6850570474611285, | |
| "learning_rate": 4.855285892813033e-09, | |
| "loss": 0.5829460024833679, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 1.9945815773630344, | |
| "grad_norm": 0.6410933816390361, | |
| "learning_rate": 3.567164136120127e-09, | |
| "loss": 0.5028737187385559, | |
| "step": 1657 | |
| }, | |
| { | |
| "epoch": 1.99578567128236, | |
| "grad_norm": 0.65338875182292, | |
| "learning_rate": 2.47720631710302e-09, | |
| "loss": 0.5529558658599854, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 1.9969897652016857, | |
| "grad_norm": 0.6618217221632864, | |
| "learning_rate": 1.5854167558670975e-09, | |
| "loss": 0.5901626348495483, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 1.9981938591210113, | |
| "grad_norm": 0.6620340323637777, | |
| "learning_rate": 8.917989870849131e-10, | |
| "loss": 0.55892014503479, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.9993979530403372, | |
| "grad_norm": 0.7584505389100632, | |
| "learning_rate": 3.9635575994623196e-10, | |
| "loss": 0.613528311252594, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.897855344686248, | |
| "learning_rate": 9.908903817468229e-11, | |
| "loss": 0.5595951080322266, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 1662, | |
| "total_flos": 1137613352534016.0, | |
| "train_loss": 0.6870017016730153, | |
| "train_runtime": 15224.812, | |
| "train_samples_per_second": 0.873, | |
| "train_steps_per_second": 0.109 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1662, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1137613352534016.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |