Text Generation
Transformers
Safetensors
qwen2
Generated from Trainer
open-r1
trl
sft
conversational
text-generation-inference
Instructions to use flyingbugs/Qwen2.5-Math-7B-generalthought-random with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use flyingbugs/Qwen2.5-Math-7B-generalthought-random with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="flyingbugs/Qwen2.5-Math-7B-generalthought-random") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("flyingbugs/Qwen2.5-Math-7B-generalthought-random") model = AutoModelForCausalLM.from_pretrained("flyingbugs/Qwen2.5-Math-7B-generalthought-random") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use flyingbugs/Qwen2.5-Math-7B-generalthought-random with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "flyingbugs/Qwen2.5-Math-7B-generalthought-random" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "flyingbugs/Qwen2.5-Math-7B-generalthought-random", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/flyingbugs/Qwen2.5-Math-7B-generalthought-random
- SGLang
How to use flyingbugs/Qwen2.5-Math-7B-generalthought-random with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "flyingbugs/Qwen2.5-Math-7B-generalthought-random" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "flyingbugs/Qwen2.5-Math-7B-generalthought-random", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "flyingbugs/Qwen2.5-Math-7B-generalthought-random" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "flyingbugs/Qwen2.5-Math-7B-generalthought-random", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use flyingbugs/Qwen2.5-Math-7B-generalthought-random with Docker Model Runner:
docker model run hf.co/flyingbugs/Qwen2.5-Math-7B-generalthought-random
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9962264150943394, | |
| "eval_steps": 500, | |
| "global_step": 1986, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0015094339622641509, | |
| "grad_norm": 42.84351638901055, | |
| "learning_rate": 2.5125628140703517e-07, | |
| "loss": 11.7003, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0030188679245283017, | |
| "grad_norm": 40.58796594063912, | |
| "learning_rate": 5.025125628140703e-07, | |
| "loss": 11.756, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004528301886792453, | |
| "grad_norm": 42.144032983881885, | |
| "learning_rate": 7.537688442211055e-07, | |
| "loss": 11.7907, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0060377358490566035, | |
| "grad_norm": 45.72964350820413, | |
| "learning_rate": 1.0050251256281407e-06, | |
| "loss": 11.6821, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.007547169811320755, | |
| "grad_norm": 40.81047058078221, | |
| "learning_rate": 1.256281407035176e-06, | |
| "loss": 11.8394, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.009056603773584906, | |
| "grad_norm": 42.899640519128475, | |
| "learning_rate": 1.507537688442211e-06, | |
| "loss": 11.6455, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.010566037735849057, | |
| "grad_norm": 40.231481376946206, | |
| "learning_rate": 1.7587939698492463e-06, | |
| "loss": 11.7821, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.012075471698113207, | |
| "grad_norm": 43.18420575417782, | |
| "learning_rate": 2.0100502512562813e-06, | |
| "loss": 11.51, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.013584905660377358, | |
| "grad_norm": 45.40072785894473, | |
| "learning_rate": 2.261306532663317e-06, | |
| "loss": 11.4937, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.01509433962264151, | |
| "grad_norm": 60.478523691312404, | |
| "learning_rate": 2.512562814070352e-06, | |
| "loss": 10.4386, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01660377358490566, | |
| "grad_norm": 55.94613742853918, | |
| "learning_rate": 2.7638190954773874e-06, | |
| "loss": 10.2241, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.018113207547169812, | |
| "grad_norm": 54.818513224626024, | |
| "learning_rate": 3.015075376884422e-06, | |
| "loss": 10.6026, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.019622641509433963, | |
| "grad_norm": 84.73466989691137, | |
| "learning_rate": 3.2663316582914575e-06, | |
| "loss": 6.2547, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.021132075471698115, | |
| "grad_norm": 76.688984609838, | |
| "learning_rate": 3.5175879396984926e-06, | |
| "loss": 5.6541, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.022641509433962263, | |
| "grad_norm": 76.71708348082493, | |
| "learning_rate": 3.7688442211055276e-06, | |
| "loss": 5.5968, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.024150943396226414, | |
| "grad_norm": 57.53302315459275, | |
| "learning_rate": 4.020100502512563e-06, | |
| "loss": 4.4551, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.025660377358490565, | |
| "grad_norm": 20.381484799022463, | |
| "learning_rate": 4.271356783919598e-06, | |
| "loss": 2.6246, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.027169811320754716, | |
| "grad_norm": 7.4120018391528975, | |
| "learning_rate": 4.522613065326634e-06, | |
| "loss": 1.9439, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.028679245283018868, | |
| "grad_norm": 7.246473146417547, | |
| "learning_rate": 4.773869346733668e-06, | |
| "loss": 2.0671, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.03018867924528302, | |
| "grad_norm": 5.317897222021412, | |
| "learning_rate": 5.025125628140704e-06, | |
| "loss": 1.8909, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03169811320754717, | |
| "grad_norm": 4.371583867174312, | |
| "learning_rate": 5.276381909547739e-06, | |
| "loss": 1.706, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.03320754716981132, | |
| "grad_norm": 3.7247484208320563, | |
| "learning_rate": 5.527638190954775e-06, | |
| "loss": 1.6542, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.03471698113207547, | |
| "grad_norm": 3.5432475784664277, | |
| "learning_rate": 5.778894472361809e-06, | |
| "loss": 1.8566, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.036226415094339624, | |
| "grad_norm": 2.2793446224909015, | |
| "learning_rate": 6.030150753768844e-06, | |
| "loss": 1.6225, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.03773584905660377, | |
| "grad_norm": 2.976629270506673, | |
| "learning_rate": 6.2814070351758795e-06, | |
| "loss": 1.8027, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03924528301886793, | |
| "grad_norm": 1.9292710465468252, | |
| "learning_rate": 6.532663316582915e-06, | |
| "loss": 1.6694, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.040754716981132075, | |
| "grad_norm": 1.8235949344084434, | |
| "learning_rate": 6.7839195979899505e-06, | |
| "loss": 1.3449, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.04226415094339623, | |
| "grad_norm": 1.2509325845880814, | |
| "learning_rate": 7.035175879396985e-06, | |
| "loss": 1.4428, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.04377358490566038, | |
| "grad_norm": 1.1446202521586268, | |
| "learning_rate": 7.28643216080402e-06, | |
| "loss": 1.2865, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.045283018867924525, | |
| "grad_norm": 1.036741546377771, | |
| "learning_rate": 7.537688442211055e-06, | |
| "loss": 1.4755, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04679245283018868, | |
| "grad_norm": 1.0321415589665388, | |
| "learning_rate": 7.788944723618092e-06, | |
| "loss": 1.5658, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.04830188679245283, | |
| "grad_norm": 0.8390420469610276, | |
| "learning_rate": 8.040201005025125e-06, | |
| "loss": 1.2749, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04981132075471698, | |
| "grad_norm": 0.8773961154600026, | |
| "learning_rate": 8.291457286432161e-06, | |
| "loss": 1.3381, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.05132075471698113, | |
| "grad_norm": 0.688788854923463, | |
| "learning_rate": 8.542713567839196e-06, | |
| "loss": 1.2844, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.052830188679245285, | |
| "grad_norm": 0.697731142261381, | |
| "learning_rate": 8.793969849246232e-06, | |
| "loss": 1.0224, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.05433962264150943, | |
| "grad_norm": 0.7481698939939284, | |
| "learning_rate": 9.045226130653267e-06, | |
| "loss": 1.3223, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.05584905660377359, | |
| "grad_norm": 0.713480143085657, | |
| "learning_rate": 9.296482412060301e-06, | |
| "loss": 1.3433, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.057358490566037736, | |
| "grad_norm": 0.5516147418737927, | |
| "learning_rate": 9.547738693467337e-06, | |
| "loss": 1.1849, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.05886792452830188, | |
| "grad_norm": 0.5489764999546655, | |
| "learning_rate": 9.798994974874372e-06, | |
| "loss": 1.221, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.06037735849056604, | |
| "grad_norm": 0.5976907585489283, | |
| "learning_rate": 1.0050251256281408e-05, | |
| "loss": 1.1651, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.061886792452830186, | |
| "grad_norm": 0.5729987457779835, | |
| "learning_rate": 1.0301507537688443e-05, | |
| "loss": 1.2038, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.06339622641509433, | |
| "grad_norm": 0.849789141484363, | |
| "learning_rate": 1.0552763819095479e-05, | |
| "loss": 0.975, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0649056603773585, | |
| "grad_norm": 0.5378335024110182, | |
| "learning_rate": 1.0804020100502512e-05, | |
| "loss": 1.1693, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.06641509433962264, | |
| "grad_norm": 0.4605169598599595, | |
| "learning_rate": 1.105527638190955e-05, | |
| "loss": 1.0198, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06792452830188679, | |
| "grad_norm": 0.4716228872243182, | |
| "learning_rate": 1.1306532663316583e-05, | |
| "loss": 1.0925, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.06943396226415094, | |
| "grad_norm": 0.4669934577396619, | |
| "learning_rate": 1.1557788944723619e-05, | |
| "loss": 1.1274, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0709433962264151, | |
| "grad_norm": 0.4621587559628666, | |
| "learning_rate": 1.1809045226130654e-05, | |
| "loss": 1.1123, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.07245283018867925, | |
| "grad_norm": 0.7302537885626854, | |
| "learning_rate": 1.2060301507537688e-05, | |
| "loss": 1.0177, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0739622641509434, | |
| "grad_norm": 0.4731493256116344, | |
| "learning_rate": 1.2311557788944725e-05, | |
| "loss": 1.0597, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.07547169811320754, | |
| "grad_norm": 0.44703159598258635, | |
| "learning_rate": 1.2562814070351759e-05, | |
| "loss": 1.1184, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07698113207547169, | |
| "grad_norm": 0.37947111567149666, | |
| "learning_rate": 1.2814070351758795e-05, | |
| "loss": 0.9152, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.07849056603773585, | |
| "grad_norm": 0.4138677678629633, | |
| "learning_rate": 1.306532663316583e-05, | |
| "loss": 0.9698, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.4210863662592338, | |
| "learning_rate": 1.3316582914572864e-05, | |
| "loss": 1.0264, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.08150943396226415, | |
| "grad_norm": 0.4657161680493391, | |
| "learning_rate": 1.3567839195979901e-05, | |
| "loss": 1.2491, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0830188679245283, | |
| "grad_norm": 0.4322144912028561, | |
| "learning_rate": 1.3819095477386935e-05, | |
| "loss": 1.0837, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.08452830188679246, | |
| "grad_norm": 0.4895899695520348, | |
| "learning_rate": 1.407035175879397e-05, | |
| "loss": 0.9843, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.0860377358490566, | |
| "grad_norm": 0.38329697027276266, | |
| "learning_rate": 1.4321608040201007e-05, | |
| "loss": 0.9874, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.08754716981132075, | |
| "grad_norm": 0.3728506554128249, | |
| "learning_rate": 1.457286432160804e-05, | |
| "loss": 1.0505, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.0890566037735849, | |
| "grad_norm": 0.42582396506199066, | |
| "learning_rate": 1.4824120603015077e-05, | |
| "loss": 1.0177, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.09056603773584905, | |
| "grad_norm": 0.4039105071166847, | |
| "learning_rate": 1.507537688442211e-05, | |
| "loss": 1.0522, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09207547169811321, | |
| "grad_norm": 0.39048018099504467, | |
| "learning_rate": 1.5326633165829146e-05, | |
| "loss": 1.1106, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.09358490566037736, | |
| "grad_norm": 0.3598639795497603, | |
| "learning_rate": 1.5577889447236183e-05, | |
| "loss": 0.9756, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09509433962264151, | |
| "grad_norm": 0.33978834353660775, | |
| "learning_rate": 1.5829145728643217e-05, | |
| "loss": 0.9874, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.09660377358490566, | |
| "grad_norm": 0.3571676859401004, | |
| "learning_rate": 1.608040201005025e-05, | |
| "loss": 0.9647, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.09811320754716982, | |
| "grad_norm": 0.3713953220516257, | |
| "learning_rate": 1.6331658291457288e-05, | |
| "loss": 1.0445, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.09962264150943397, | |
| "grad_norm": 0.3708364360600743, | |
| "learning_rate": 1.6582914572864322e-05, | |
| "loss": 1.0168, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.10113207547169811, | |
| "grad_norm": 0.3673356575030503, | |
| "learning_rate": 1.683417085427136e-05, | |
| "loss": 1.0713, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.10264150943396226, | |
| "grad_norm": 0.3817704093554328, | |
| "learning_rate": 1.7085427135678393e-05, | |
| "loss": 1.1859, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.10415094339622641, | |
| "grad_norm": 0.35553437445641883, | |
| "learning_rate": 1.7336683417085427e-05, | |
| "loss": 0.9106, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.10566037735849057, | |
| "grad_norm": 0.3549271646935127, | |
| "learning_rate": 1.7587939698492464e-05, | |
| "loss": 0.9792, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10716981132075472, | |
| "grad_norm": 0.34909558103973126, | |
| "learning_rate": 1.7839195979899497e-05, | |
| "loss": 0.9646, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.10867924528301887, | |
| "grad_norm": 0.3777705845997939, | |
| "learning_rate": 1.8090452261306535e-05, | |
| "loss": 1.0047, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.11018867924528301, | |
| "grad_norm": 0.34807538051544334, | |
| "learning_rate": 1.834170854271357e-05, | |
| "loss": 1.0855, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.11169811320754718, | |
| "grad_norm": 0.3253777447991679, | |
| "learning_rate": 1.8592964824120602e-05, | |
| "loss": 0.9121, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.11320754716981132, | |
| "grad_norm": 0.3734270413143541, | |
| "learning_rate": 1.884422110552764e-05, | |
| "loss": 1.0115, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.11471698113207547, | |
| "grad_norm": 0.3746609650254723, | |
| "learning_rate": 1.9095477386934673e-05, | |
| "loss": 1.0999, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11622641509433962, | |
| "grad_norm": 0.3218308743491144, | |
| "learning_rate": 1.934673366834171e-05, | |
| "loss": 0.9628, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.11773584905660377, | |
| "grad_norm": 0.378328515287124, | |
| "learning_rate": 1.9597989949748744e-05, | |
| "loss": 0.8928, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.11924528301886793, | |
| "grad_norm": 0.385332164332375, | |
| "learning_rate": 1.984924623115578e-05, | |
| "loss": 1.11, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.12075471698113208, | |
| "grad_norm": 0.36828879580340973, | |
| "learning_rate": 2.0100502512562815e-05, | |
| "loss": 0.9687, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12226415094339622, | |
| "grad_norm": 0.37180628983371017, | |
| "learning_rate": 2.035175879396985e-05, | |
| "loss": 0.9488, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.12377358490566037, | |
| "grad_norm": 0.3655463598030299, | |
| "learning_rate": 2.0603015075376886e-05, | |
| "loss": 0.9635, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.12528301886792453, | |
| "grad_norm": 0.3973839411165058, | |
| "learning_rate": 2.085427135678392e-05, | |
| "loss": 0.9539, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.12679245283018867, | |
| "grad_norm": 0.8889038300232109, | |
| "learning_rate": 2.1105527638190957e-05, | |
| "loss": 0.9591, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.12830188679245283, | |
| "grad_norm": 0.4159535642199707, | |
| "learning_rate": 2.135678391959799e-05, | |
| "loss": 0.9782, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.129811320754717, | |
| "grad_norm": 0.4055951184040692, | |
| "learning_rate": 2.1608040201005025e-05, | |
| "loss": 0.9885, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.13132075471698113, | |
| "grad_norm": 0.3740604222184423, | |
| "learning_rate": 2.1859296482412062e-05, | |
| "loss": 0.9783, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.1328301886792453, | |
| "grad_norm": 0.41043743276277883, | |
| "learning_rate": 2.21105527638191e-05, | |
| "loss": 0.9789, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.13433962264150942, | |
| "grad_norm": 0.3744677884389674, | |
| "learning_rate": 2.2361809045226133e-05, | |
| "loss": 0.8614, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.13584905660377358, | |
| "grad_norm": 0.3674741281297594, | |
| "learning_rate": 2.2613065326633167e-05, | |
| "loss": 0.8971, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.13735849056603774, | |
| "grad_norm": 0.41628170180596963, | |
| "learning_rate": 2.28643216080402e-05, | |
| "loss": 0.8112, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.13886792452830188, | |
| "grad_norm": 0.4508017917622187, | |
| "learning_rate": 2.3115577889447238e-05, | |
| "loss": 1.0037, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.14037735849056604, | |
| "grad_norm": 0.5170078422537966, | |
| "learning_rate": 2.3366834170854275e-05, | |
| "loss": 1.0547, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.1418867924528302, | |
| "grad_norm": 0.5212510340969881, | |
| "learning_rate": 2.361809045226131e-05, | |
| "loss": 1.0543, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.14339622641509434, | |
| "grad_norm": 0.6121067181622158, | |
| "learning_rate": 2.3869346733668342e-05, | |
| "loss": 0.9061, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1449056603773585, | |
| "grad_norm": 0.5238865642185518, | |
| "learning_rate": 2.4120603015075376e-05, | |
| "loss": 0.9103, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.14641509433962263, | |
| "grad_norm": 0.4171766577781957, | |
| "learning_rate": 2.4371859296482413e-05, | |
| "loss": 0.8452, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.1479245283018868, | |
| "grad_norm": 0.5448433488826973, | |
| "learning_rate": 2.462311557788945e-05, | |
| "loss": 1.0022, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.14943396226415095, | |
| "grad_norm": 1.0734992777741617, | |
| "learning_rate": 2.4874371859296484e-05, | |
| "loss": 0.943, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.1509433962264151, | |
| "grad_norm": 0.49449168180642084, | |
| "learning_rate": 2.5125628140703518e-05, | |
| "loss": 0.9584, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15245283018867925, | |
| "grad_norm": 0.5088881724419568, | |
| "learning_rate": 2.5376884422110552e-05, | |
| "loss": 0.8866, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.15396226415094338, | |
| "grad_norm": 0.508102690727942, | |
| "learning_rate": 2.562814070351759e-05, | |
| "loss": 0.9326, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.15547169811320755, | |
| "grad_norm": 0.4653650117166445, | |
| "learning_rate": 2.5879396984924626e-05, | |
| "loss": 0.953, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.1569811320754717, | |
| "grad_norm": 0.46750559798962293, | |
| "learning_rate": 2.613065326633166e-05, | |
| "loss": 0.8609, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.15849056603773584, | |
| "grad_norm": 0.6835796303360562, | |
| "learning_rate": 2.6381909547738694e-05, | |
| "loss": 0.8787, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.5141911707155195, | |
| "learning_rate": 2.6633165829145728e-05, | |
| "loss": 0.9585, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.16150943396226414, | |
| "grad_norm": 0.4883615708108046, | |
| "learning_rate": 2.6884422110552765e-05, | |
| "loss": 0.9979, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.1630188679245283, | |
| "grad_norm": 0.5190216260067821, | |
| "learning_rate": 2.7135678391959802e-05, | |
| "loss": 0.865, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.16452830188679246, | |
| "grad_norm": 0.408410057009239, | |
| "learning_rate": 2.738693467336684e-05, | |
| "loss": 0.9051, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.1660377358490566, | |
| "grad_norm": 0.4906888730107437, | |
| "learning_rate": 2.763819095477387e-05, | |
| "loss": 0.8904, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.16754716981132076, | |
| "grad_norm": 0.42357291845895245, | |
| "learning_rate": 2.7889447236180903e-05, | |
| "loss": 0.8314, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.16905660377358492, | |
| "grad_norm": 1.440741980111514, | |
| "learning_rate": 2.814070351758794e-05, | |
| "loss": 1.0156, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.17056603773584905, | |
| "grad_norm": 0.5124296806329448, | |
| "learning_rate": 2.8391959798994978e-05, | |
| "loss": 0.8618, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.1720754716981132, | |
| "grad_norm": 0.5963788620163426, | |
| "learning_rate": 2.8643216080402015e-05, | |
| "loss": 0.964, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.17358490566037735, | |
| "grad_norm": 0.45527147444306076, | |
| "learning_rate": 2.8894472361809045e-05, | |
| "loss": 0.8331, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.1750943396226415, | |
| "grad_norm": 0.5115219921915511, | |
| "learning_rate": 2.914572864321608e-05, | |
| "loss": 0.9331, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.17660377358490567, | |
| "grad_norm": 4.556437650890275, | |
| "learning_rate": 2.9396984924623116e-05, | |
| "loss": 0.962, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.1781132075471698, | |
| "grad_norm": 0.5312376654687548, | |
| "learning_rate": 2.9648241206030153e-05, | |
| "loss": 0.8834, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.17962264150943397, | |
| "grad_norm": 0.45200310784615344, | |
| "learning_rate": 2.989949748743719e-05, | |
| "loss": 0.9321, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.1811320754716981, | |
| "grad_norm": 0.49479911246054076, | |
| "learning_rate": 3.015075376884422e-05, | |
| "loss": 0.8612, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.18264150943396226, | |
| "grad_norm": 0.5356223053603791, | |
| "learning_rate": 3.0402010050251255e-05, | |
| "loss": 0.9651, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.18415094339622642, | |
| "grad_norm": 0.4775869273954035, | |
| "learning_rate": 3.065326633165829e-05, | |
| "loss": 0.9167, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.18566037735849056, | |
| "grad_norm": 0.38759830910589615, | |
| "learning_rate": 3.0904522613065326e-05, | |
| "loss": 0.7914, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.18716981132075472, | |
| "grad_norm": 0.505778754703735, | |
| "learning_rate": 3.1155778894472366e-05, | |
| "loss": 0.9004, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.18867924528301888, | |
| "grad_norm": 1.5610813600582665, | |
| "learning_rate": 3.14070351758794e-05, | |
| "loss": 0.823, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.19018867924528302, | |
| "grad_norm": 0.5033816078558214, | |
| "learning_rate": 3.1658291457286434e-05, | |
| "loss": 0.9111, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.19169811320754718, | |
| "grad_norm": 0.5411937400048007, | |
| "learning_rate": 3.190954773869347e-05, | |
| "loss": 0.8375, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.1932075471698113, | |
| "grad_norm": 0.4287407951685938, | |
| "learning_rate": 3.21608040201005e-05, | |
| "loss": 0.8805, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.19471698113207547, | |
| "grad_norm": 0.5109121770551573, | |
| "learning_rate": 3.241206030150754e-05, | |
| "loss": 0.9597, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.19622641509433963, | |
| "grad_norm": 0.4260153095022451, | |
| "learning_rate": 3.2663316582914576e-05, | |
| "loss": 0.998, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.19773584905660377, | |
| "grad_norm": 0.44701240738286824, | |
| "learning_rate": 3.291457286432161e-05, | |
| "loss": 0.8782, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.19924528301886793, | |
| "grad_norm": 0.44157622535461144, | |
| "learning_rate": 3.3165829145728643e-05, | |
| "loss": 0.8841, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.20075471698113206, | |
| "grad_norm": 0.4801398431121367, | |
| "learning_rate": 3.341708542713568e-05, | |
| "loss": 0.8644, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.20226415094339623, | |
| "grad_norm": 0.36530196405233833, | |
| "learning_rate": 3.366834170854272e-05, | |
| "loss": 0.8119, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.2037735849056604, | |
| "grad_norm": 0.5570490282592016, | |
| "learning_rate": 3.391959798994975e-05, | |
| "loss": 0.8855, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.20528301886792452, | |
| "grad_norm": 0.3986151374909903, | |
| "learning_rate": 3.4170854271356785e-05, | |
| "loss": 0.8876, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.20679245283018868, | |
| "grad_norm": 0.43926099950410996, | |
| "learning_rate": 3.442211055276382e-05, | |
| "loss": 0.8575, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.20830188679245282, | |
| "grad_norm": 0.45592611275226874, | |
| "learning_rate": 3.467336683417085e-05, | |
| "loss": 0.8576, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.20981132075471698, | |
| "grad_norm": 0.5049788202212411, | |
| "learning_rate": 3.4924623115577894e-05, | |
| "loss": 0.9399, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.21132075471698114, | |
| "grad_norm": 0.3968817831624458, | |
| "learning_rate": 3.517587939698493e-05, | |
| "loss": 0.884, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.21283018867924527, | |
| "grad_norm": 0.5034047348280332, | |
| "learning_rate": 3.542713567839196e-05, | |
| "loss": 0.9453, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.21433962264150944, | |
| "grad_norm": 0.38193650268073553, | |
| "learning_rate": 3.5678391959798995e-05, | |
| "loss": 0.8446, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.2158490566037736, | |
| "grad_norm": 0.47293401837423554, | |
| "learning_rate": 3.592964824120603e-05, | |
| "loss": 0.8357, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.21735849056603773, | |
| "grad_norm": 0.5128727207732378, | |
| "learning_rate": 3.618090452261307e-05, | |
| "loss": 0.8878, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.2188679245283019, | |
| "grad_norm": 0.4490745977510919, | |
| "learning_rate": 3.64321608040201e-05, | |
| "loss": 0.7525, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.22037735849056603, | |
| "grad_norm": 0.5285464668249072, | |
| "learning_rate": 3.668341708542714e-05, | |
| "loss": 0.8317, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.2218867924528302, | |
| "grad_norm": 0.4684615864196027, | |
| "learning_rate": 3.693467336683417e-05, | |
| "loss": 0.8679, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.22339622641509435, | |
| "grad_norm": 0.4384754233570824, | |
| "learning_rate": 3.7185929648241204e-05, | |
| "loss": 0.9187, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.22490566037735849, | |
| "grad_norm": 0.4959217477046417, | |
| "learning_rate": 3.7437185929648245e-05, | |
| "loss": 0.9176, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.22641509433962265, | |
| "grad_norm": 0.513351882366079, | |
| "learning_rate": 3.768844221105528e-05, | |
| "loss": 0.9131, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22792452830188678, | |
| "grad_norm": 0.46013492291199676, | |
| "learning_rate": 3.793969849246231e-05, | |
| "loss": 0.8977, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.22943396226415094, | |
| "grad_norm": 0.4228108610077172, | |
| "learning_rate": 3.8190954773869346e-05, | |
| "loss": 0.8378, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.2309433962264151, | |
| "grad_norm": 0.5211386844607362, | |
| "learning_rate": 3.844221105527639e-05, | |
| "loss": 0.8609, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.23245283018867924, | |
| "grad_norm": 0.42815407260277805, | |
| "learning_rate": 3.869346733668342e-05, | |
| "loss": 0.797, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.2339622641509434, | |
| "grad_norm": 0.44022812612987366, | |
| "learning_rate": 3.8944723618090455e-05, | |
| "loss": 0.8717, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.23547169811320753, | |
| "grad_norm": 0.4131388371605724, | |
| "learning_rate": 3.919597989949749e-05, | |
| "loss": 0.812, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.2369811320754717, | |
| "grad_norm": 0.6710269473568021, | |
| "learning_rate": 3.944723618090452e-05, | |
| "loss": 0.8191, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.23849056603773586, | |
| "grad_norm": 0.4897114594610496, | |
| "learning_rate": 3.969849246231156e-05, | |
| "loss": 0.7659, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.5108095802087971, | |
| "learning_rate": 3.9949748743718597e-05, | |
| "loss": 0.8281, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.24150943396226415, | |
| "grad_norm": 0.6108421680937045, | |
| "learning_rate": 4.020100502512563e-05, | |
| "loss": 0.8847, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.24301886792452831, | |
| "grad_norm": 0.5616258226397572, | |
| "learning_rate": 4.0452261306532664e-05, | |
| "loss": 0.8054, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.24452830188679245, | |
| "grad_norm": 0.45309534243186583, | |
| "learning_rate": 4.07035175879397e-05, | |
| "loss": 0.8975, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.2460377358490566, | |
| "grad_norm": 0.5773610924314223, | |
| "learning_rate": 4.095477386934674e-05, | |
| "loss": 0.9968, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.24754716981132074, | |
| "grad_norm": 0.4469837688813638, | |
| "learning_rate": 4.120603015075377e-05, | |
| "loss": 0.8012, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2490566037735849, | |
| "grad_norm": 2.054275267813225, | |
| "learning_rate": 4.1457286432160806e-05, | |
| "loss": 0.8895, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.25056603773584907, | |
| "grad_norm": 0.5614011880192892, | |
| "learning_rate": 4.170854271356784e-05, | |
| "loss": 0.9091, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.25207547169811323, | |
| "grad_norm": 0.5803759031771593, | |
| "learning_rate": 4.1959798994974874e-05, | |
| "loss": 0.9152, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.25358490566037734, | |
| "grad_norm": 0.6748698350397639, | |
| "learning_rate": 4.2211055276381914e-05, | |
| "loss": 0.8804, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.2550943396226415, | |
| "grad_norm": 0.5901722233952649, | |
| "learning_rate": 4.246231155778895e-05, | |
| "loss": 0.8415, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.25660377358490566, | |
| "grad_norm": 0.6343703843442664, | |
| "learning_rate": 4.271356783919598e-05, | |
| "loss": 0.8981, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2581132075471698, | |
| "grad_norm": 0.5181992447670495, | |
| "learning_rate": 4.2964824120603016e-05, | |
| "loss": 0.8191, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.259622641509434, | |
| "grad_norm": 0.4542387485161782, | |
| "learning_rate": 4.321608040201005e-05, | |
| "loss": 0.8212, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.2611320754716981, | |
| "grad_norm": 0.5151193905845751, | |
| "learning_rate": 4.346733668341709e-05, | |
| "loss": 0.8486, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.26264150943396225, | |
| "grad_norm": 1.037000010215159, | |
| "learning_rate": 4.3718592964824124e-05, | |
| "loss": 0.813, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2641509433962264, | |
| "grad_norm": 0.4911515809403663, | |
| "learning_rate": 4.396984924623116e-05, | |
| "loss": 0.8018, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.2656603773584906, | |
| "grad_norm": 0.4701828499446655, | |
| "learning_rate": 4.42211055276382e-05, | |
| "loss": 0.8715, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.26716981132075474, | |
| "grad_norm": 0.4950297728241695, | |
| "learning_rate": 4.4472361809045225e-05, | |
| "loss": 0.8019, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.26867924528301884, | |
| "grad_norm": 0.5589767820377305, | |
| "learning_rate": 4.4723618090452266e-05, | |
| "loss": 0.9524, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.270188679245283, | |
| "grad_norm": 0.571633016201735, | |
| "learning_rate": 4.49748743718593e-05, | |
| "loss": 0.8792, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.27169811320754716, | |
| "grad_norm": 0.476203248798549, | |
| "learning_rate": 4.522613065326633e-05, | |
| "loss": 0.8768, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2732075471698113, | |
| "grad_norm": 0.48088169221041444, | |
| "learning_rate": 4.5477386934673374e-05, | |
| "loss": 0.7918, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.2747169811320755, | |
| "grad_norm": 0.5571799591230107, | |
| "learning_rate": 4.57286432160804e-05, | |
| "loss": 0.8038, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.27622641509433965, | |
| "grad_norm": 0.5527228247900622, | |
| "learning_rate": 4.597989949748744e-05, | |
| "loss": 0.8643, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.27773584905660376, | |
| "grad_norm": 0.5211423771128341, | |
| "learning_rate": 4.6231155778894475e-05, | |
| "loss": 0.8878, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2792452830188679, | |
| "grad_norm": 0.6572932038519691, | |
| "learning_rate": 4.648241206030151e-05, | |
| "loss": 0.8722, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.2807547169811321, | |
| "grad_norm": 0.7101877955695006, | |
| "learning_rate": 4.673366834170855e-05, | |
| "loss": 0.725, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.28226415094339624, | |
| "grad_norm": 0.49463109890029217, | |
| "learning_rate": 4.6984924623115577e-05, | |
| "loss": 0.8044, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.2837735849056604, | |
| "grad_norm": 0.6761004422971036, | |
| "learning_rate": 4.723618090452262e-05, | |
| "loss": 0.8928, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2852830188679245, | |
| "grad_norm": 0.7308836814149202, | |
| "learning_rate": 4.748743718592965e-05, | |
| "loss": 0.7881, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.28679245283018867, | |
| "grad_norm": 0.5752705782759212, | |
| "learning_rate": 4.7738693467336685e-05, | |
| "loss": 0.7849, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.28830188679245283, | |
| "grad_norm": 0.6124086779896891, | |
| "learning_rate": 4.7989949748743725e-05, | |
| "loss": 0.8595, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.289811320754717, | |
| "grad_norm": 0.9703844768396788, | |
| "learning_rate": 4.824120603015075e-05, | |
| "loss": 0.8965, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.29132075471698116, | |
| "grad_norm": 0.920385457687424, | |
| "learning_rate": 4.849246231155779e-05, | |
| "loss": 0.8885, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.29283018867924526, | |
| "grad_norm": 0.5451187115485865, | |
| "learning_rate": 4.874371859296483e-05, | |
| "loss": 0.8876, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.2943396226415094, | |
| "grad_norm": 0.8924359703337917, | |
| "learning_rate": 4.899497487437186e-05, | |
| "loss": 0.7749, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.2958490566037736, | |
| "grad_norm": 0.6987574115677371, | |
| "learning_rate": 4.92462311557789e-05, | |
| "loss": 0.8024, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.29735849056603775, | |
| "grad_norm": 1.0879879193939195, | |
| "learning_rate": 4.949748743718593e-05, | |
| "loss": 0.908, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.2988679245283019, | |
| "grad_norm": 0.7248543407154597, | |
| "learning_rate": 4.974874371859297e-05, | |
| "loss": 0.8631, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.300377358490566, | |
| "grad_norm": 0.8236665003667092, | |
| "learning_rate": 5e-05, | |
| "loss": 0.7335, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.3018867924528302, | |
| "grad_norm": 0.6243498075203732, | |
| "learning_rate": 4.9972020145495246e-05, | |
| "loss": 0.8225, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.30339622641509434, | |
| "grad_norm": 0.9010469648278001, | |
| "learning_rate": 4.994404029099049e-05, | |
| "loss": 0.8684, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.3049056603773585, | |
| "grad_norm": 0.8500224224154146, | |
| "learning_rate": 4.991606043648573e-05, | |
| "loss": 0.7941, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.30641509433962266, | |
| "grad_norm": 0.5361097858505313, | |
| "learning_rate": 4.9888080581980976e-05, | |
| "loss": 0.8696, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.30792452830188677, | |
| "grad_norm": 0.9288603951781687, | |
| "learning_rate": 4.986010072747622e-05, | |
| "loss": 0.9553, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.30943396226415093, | |
| "grad_norm": 0.7165689385459199, | |
| "learning_rate": 4.983212087297146e-05, | |
| "loss": 0.7807, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.3109433962264151, | |
| "grad_norm": 0.6324302878103103, | |
| "learning_rate": 4.9804141018466706e-05, | |
| "loss": 0.826, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.31245283018867925, | |
| "grad_norm": 1.0280953874798517, | |
| "learning_rate": 4.977616116396195e-05, | |
| "loss": 0.8772, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.3139622641509434, | |
| "grad_norm": 0.6365774522631158, | |
| "learning_rate": 4.974818130945719e-05, | |
| "loss": 0.7882, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.3154716981132075, | |
| "grad_norm": 0.7866464392944168, | |
| "learning_rate": 4.9720201454952436e-05, | |
| "loss": 0.8591, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.3169811320754717, | |
| "grad_norm": 0.9900793181165777, | |
| "learning_rate": 4.969222160044768e-05, | |
| "loss": 0.9166, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.31849056603773584, | |
| "grad_norm": 0.5027834608849903, | |
| "learning_rate": 4.966424174594292e-05, | |
| "loss": 0.8475, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.8149475773947513, | |
| "learning_rate": 4.9636261891438166e-05, | |
| "loss": 0.8877, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.32150943396226417, | |
| "grad_norm": 0.7936117976140553, | |
| "learning_rate": 4.960828203693341e-05, | |
| "loss": 0.7943, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.3230188679245283, | |
| "grad_norm": 0.6047908551122195, | |
| "learning_rate": 4.958030218242865e-05, | |
| "loss": 0.7487, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.32452830188679244, | |
| "grad_norm": 0.8210359919252752, | |
| "learning_rate": 4.9552322327923896e-05, | |
| "loss": 0.8052, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.3260377358490566, | |
| "grad_norm": 0.5010377931393522, | |
| "learning_rate": 4.952434247341914e-05, | |
| "loss": 0.8029, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.32754716981132076, | |
| "grad_norm": 0.6159395499082359, | |
| "learning_rate": 4.949636261891438e-05, | |
| "loss": 0.7816, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.3290566037735849, | |
| "grad_norm": 0.6021660774660482, | |
| "learning_rate": 4.9468382764409626e-05, | |
| "loss": 0.8034, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.3305660377358491, | |
| "grad_norm": 0.48862257577764984, | |
| "learning_rate": 4.944040290990487e-05, | |
| "loss": 0.7641, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.3320754716981132, | |
| "grad_norm": 0.7291022201204262, | |
| "learning_rate": 4.941242305540011e-05, | |
| "loss": 0.7622, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.33358490566037735, | |
| "grad_norm": 0.5680273830366244, | |
| "learning_rate": 4.9384443200895356e-05, | |
| "loss": 0.8032, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.3350943396226415, | |
| "grad_norm": 0.7388526494706458, | |
| "learning_rate": 4.93564633463906e-05, | |
| "loss": 0.7876, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.3366037735849057, | |
| "grad_norm": 0.8366607526648661, | |
| "learning_rate": 4.932848349188584e-05, | |
| "loss": 0.838, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.33811320754716984, | |
| "grad_norm": 0.5362665065766964, | |
| "learning_rate": 4.930050363738109e-05, | |
| "loss": 0.8709, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.33962264150943394, | |
| "grad_norm": 0.7046748659670787, | |
| "learning_rate": 4.927252378287633e-05, | |
| "loss": 0.8295, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.3411320754716981, | |
| "grad_norm": 0.5709018100667398, | |
| "learning_rate": 4.924454392837157e-05, | |
| "loss": 0.7931, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.34264150943396227, | |
| "grad_norm": 1.5265206975752903, | |
| "learning_rate": 4.9216564073866817e-05, | |
| "loss": 0.7451, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.3441509433962264, | |
| "grad_norm": 0.6830742695717387, | |
| "learning_rate": 4.918858421936206e-05, | |
| "loss": 0.8248, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.3456603773584906, | |
| "grad_norm": 0.7664933367072962, | |
| "learning_rate": 4.91606043648573e-05, | |
| "loss": 0.8626, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.3471698113207547, | |
| "grad_norm": 0.4891691600978454, | |
| "learning_rate": 4.913262451035255e-05, | |
| "loss": 0.8749, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.34867924528301886, | |
| "grad_norm": 1.1932498298234255, | |
| "learning_rate": 4.910464465584779e-05, | |
| "loss": 0.7708, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.350188679245283, | |
| "grad_norm": 0.6049277141915559, | |
| "learning_rate": 4.907666480134303e-05, | |
| "loss": 0.7572, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3516981132075472, | |
| "grad_norm": 0.6315896027669823, | |
| "learning_rate": 4.9048684946838284e-05, | |
| "loss": 0.8363, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.35320754716981134, | |
| "grad_norm": 0.688434404877305, | |
| "learning_rate": 4.902070509233353e-05, | |
| "loss": 0.8554, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.35471698113207545, | |
| "grad_norm": 0.5422581659132807, | |
| "learning_rate": 4.899272523782877e-05, | |
| "loss": 0.8488, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.3562264150943396, | |
| "grad_norm": 0.591458349973424, | |
| "learning_rate": 4.896474538332401e-05, | |
| "loss": 0.8144, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.35773584905660377, | |
| "grad_norm": 0.5604699909131293, | |
| "learning_rate": 4.893676552881925e-05, | |
| "loss": 0.7237, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.35924528301886793, | |
| "grad_norm": 0.4673344116856225, | |
| "learning_rate": 4.8908785674314494e-05, | |
| "loss": 0.7766, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.3607547169811321, | |
| "grad_norm": 0.5436449382903117, | |
| "learning_rate": 4.888080581980974e-05, | |
| "loss": 0.8197, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.3622641509433962, | |
| "grad_norm": 0.479468226600264, | |
| "learning_rate": 4.885282596530498e-05, | |
| "loss": 0.7939, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.36377358490566036, | |
| "grad_norm": 0.6602471005804883, | |
| "learning_rate": 4.8824846110800224e-05, | |
| "loss": 0.8495, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.3652830188679245, | |
| "grad_norm": 0.4571917624290203, | |
| "learning_rate": 4.879686625629547e-05, | |
| "loss": 0.7913, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.3667924528301887, | |
| "grad_norm": 0.6232210769659097, | |
| "learning_rate": 4.876888640179072e-05, | |
| "loss": 0.7552, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.36830188679245285, | |
| "grad_norm": 0.5204436625742763, | |
| "learning_rate": 4.874090654728596e-05, | |
| "loss": 0.7701, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.36981132075471695, | |
| "grad_norm": 0.6612096193726886, | |
| "learning_rate": 4.8712926692781204e-05, | |
| "loss": 0.7847, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.3713207547169811, | |
| "grad_norm": 0.45379455740824437, | |
| "learning_rate": 4.868494683827644e-05, | |
| "loss": 0.8053, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3728301886792453, | |
| "grad_norm": 0.5973811927067406, | |
| "learning_rate": 4.8656966983771684e-05, | |
| "loss": 0.8246, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.37433962264150944, | |
| "grad_norm": 0.5220120154434879, | |
| "learning_rate": 4.862898712926693e-05, | |
| "loss": 0.8486, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.3758490566037736, | |
| "grad_norm": 0.5431811554175676, | |
| "learning_rate": 4.860100727476217e-05, | |
| "loss": 0.8141, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.37735849056603776, | |
| "grad_norm": 0.5029866404820176, | |
| "learning_rate": 4.8573027420257414e-05, | |
| "loss": 0.7952, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.37886792452830187, | |
| "grad_norm": 0.5898909870291161, | |
| "learning_rate": 4.854504756575266e-05, | |
| "loss": 0.8547, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.38037735849056603, | |
| "grad_norm": 0.6238187627468014, | |
| "learning_rate": 4.851706771124791e-05, | |
| "loss": 0.7726, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.3818867924528302, | |
| "grad_norm": 0.42756782085548817, | |
| "learning_rate": 4.848908785674315e-05, | |
| "loss": 0.7988, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.38339622641509435, | |
| "grad_norm": 0.5943288699361647, | |
| "learning_rate": 4.8461108002238394e-05, | |
| "loss": 0.8483, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.3849056603773585, | |
| "grad_norm": 0.44627827351325955, | |
| "learning_rate": 4.843312814773364e-05, | |
| "loss": 0.7907, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.3864150943396226, | |
| "grad_norm": 0.6245548346580109, | |
| "learning_rate": 4.840514829322888e-05, | |
| "loss": 0.7789, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3879245283018868, | |
| "grad_norm": 0.5052966445102851, | |
| "learning_rate": 4.837716843872412e-05, | |
| "loss": 0.8078, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.38943396226415095, | |
| "grad_norm": 0.5086194272663704, | |
| "learning_rate": 4.834918858421936e-05, | |
| "loss": 0.797, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.3909433962264151, | |
| "grad_norm": 0.4917833870150589, | |
| "learning_rate": 4.8321208729714604e-05, | |
| "loss": 0.7691, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.39245283018867927, | |
| "grad_norm": 0.5338995503228233, | |
| "learning_rate": 4.829322887520985e-05, | |
| "loss": 0.7981, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3939622641509434, | |
| "grad_norm": 0.5412599367495976, | |
| "learning_rate": 4.82652490207051e-05, | |
| "loss": 0.8443, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.39547169811320754, | |
| "grad_norm": 0.580624564144757, | |
| "learning_rate": 4.823726916620034e-05, | |
| "loss": 0.7907, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.3969811320754717, | |
| "grad_norm": 0.48369587610402004, | |
| "learning_rate": 4.8209289311695584e-05, | |
| "loss": 0.794, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.39849056603773586, | |
| "grad_norm": 0.49030096209333235, | |
| "learning_rate": 4.818130945719083e-05, | |
| "loss": 0.7584, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.5061198889128261, | |
| "learning_rate": 4.815332960268607e-05, | |
| "loss": 0.7352, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.40150943396226413, | |
| "grad_norm": 0.44245029843710665, | |
| "learning_rate": 4.8125349748181314e-05, | |
| "loss": 0.7376, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.4030188679245283, | |
| "grad_norm": 0.5564346808520174, | |
| "learning_rate": 4.809736989367655e-05, | |
| "loss": 0.7939, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.40452830188679245, | |
| "grad_norm": 0.4231713875112469, | |
| "learning_rate": 4.8069390039171794e-05, | |
| "loss": 0.7596, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.4060377358490566, | |
| "grad_norm": 0.5072296368778142, | |
| "learning_rate": 4.804141018466704e-05, | |
| "loss": 0.9397, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.4075471698113208, | |
| "grad_norm": 1.3091372981391216, | |
| "learning_rate": 4.801343033016228e-05, | |
| "loss": 0.7321, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4090566037735849, | |
| "grad_norm": 0.7068197146851543, | |
| "learning_rate": 4.798545047565753e-05, | |
| "loss": 0.8049, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.41056603773584904, | |
| "grad_norm": 0.39121782749476086, | |
| "learning_rate": 4.7957470621152775e-05, | |
| "loss": 0.707, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.4120754716981132, | |
| "grad_norm": 0.6092417359403263, | |
| "learning_rate": 4.792949076664802e-05, | |
| "loss": 0.7187, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.41358490566037737, | |
| "grad_norm": 0.44996301987627735, | |
| "learning_rate": 4.790151091214326e-05, | |
| "loss": 0.8322, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.41509433962264153, | |
| "grad_norm": 0.6626369105932329, | |
| "learning_rate": 4.7873531057638505e-05, | |
| "loss": 0.7857, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.41660377358490563, | |
| "grad_norm": 0.4967773382857501, | |
| "learning_rate": 4.784555120313375e-05, | |
| "loss": 0.746, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.4181132075471698, | |
| "grad_norm": 0.5435211685588449, | |
| "learning_rate": 4.7817571348628985e-05, | |
| "loss": 0.7965, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.41962264150943396, | |
| "grad_norm": 0.7022029449146439, | |
| "learning_rate": 4.778959149412423e-05, | |
| "loss": 0.8653, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.4211320754716981, | |
| "grad_norm": 0.45601467205881, | |
| "learning_rate": 4.776161163961947e-05, | |
| "loss": 0.777, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.4226415094339623, | |
| "grad_norm": 0.6644957716637454, | |
| "learning_rate": 4.773363178511472e-05, | |
| "loss": 0.8214, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4241509433962264, | |
| "grad_norm": 0.6246544429191916, | |
| "learning_rate": 4.7705651930609965e-05, | |
| "loss": 0.8785, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.42566037735849055, | |
| "grad_norm": 0.5291406456848594, | |
| "learning_rate": 4.767767207610521e-05, | |
| "loss": 0.7691, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.4271698113207547, | |
| "grad_norm": 0.7456610522310195, | |
| "learning_rate": 4.764969222160045e-05, | |
| "loss": 0.8264, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.4286792452830189, | |
| "grad_norm": 0.6353984731391955, | |
| "learning_rate": 4.7621712367095695e-05, | |
| "loss": 0.7689, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.43018867924528303, | |
| "grad_norm": 0.5508771074957549, | |
| "learning_rate": 4.759373251259094e-05, | |
| "loss": 0.803, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.4316981132075472, | |
| "grad_norm": 0.6135542045293, | |
| "learning_rate": 4.756575265808618e-05, | |
| "loss": 0.7338, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.4332075471698113, | |
| "grad_norm": 0.5030983850050345, | |
| "learning_rate": 4.7537772803581425e-05, | |
| "loss": 0.818, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.43471698113207546, | |
| "grad_norm": 0.7052367727423026, | |
| "learning_rate": 4.750979294907666e-05, | |
| "loss": 0.8135, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.4362264150943396, | |
| "grad_norm": 0.533554107770091, | |
| "learning_rate": 4.748181309457191e-05, | |
| "loss": 0.9047, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.4377358490566038, | |
| "grad_norm": 0.881159613978913, | |
| "learning_rate": 4.7453833240067155e-05, | |
| "loss": 0.884, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.43924528301886795, | |
| "grad_norm": 0.7306080850175966, | |
| "learning_rate": 4.74258533855624e-05, | |
| "loss": 0.8131, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.44075471698113206, | |
| "grad_norm": 0.5044741998175036, | |
| "learning_rate": 4.739787353105764e-05, | |
| "loss": 0.7513, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.4422641509433962, | |
| "grad_norm": 0.7168412587080375, | |
| "learning_rate": 4.7369893676552885e-05, | |
| "loss": 0.7788, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.4437735849056604, | |
| "grad_norm": 0.4558320123761158, | |
| "learning_rate": 4.734191382204813e-05, | |
| "loss": 0.7893, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.44528301886792454, | |
| "grad_norm": 0.7940272573905822, | |
| "learning_rate": 4.731393396754337e-05, | |
| "loss": 0.8386, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.4467924528301887, | |
| "grad_norm": 0.5513668143022887, | |
| "learning_rate": 4.7285954113038615e-05, | |
| "loss": 0.7833, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.4483018867924528, | |
| "grad_norm": 0.7079029126017938, | |
| "learning_rate": 4.725797425853386e-05, | |
| "loss": 0.7196, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.44981132075471697, | |
| "grad_norm": 1.229514968171817, | |
| "learning_rate": 4.72299944040291e-05, | |
| "loss": 0.8989, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.45132075471698113, | |
| "grad_norm": 0.6404714941336582, | |
| "learning_rate": 4.7202014549524345e-05, | |
| "loss": 0.8317, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.4528301886792453, | |
| "grad_norm": 2.4628741313154188, | |
| "learning_rate": 4.717403469501959e-05, | |
| "loss": 0.8618, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.45433962264150946, | |
| "grad_norm": 0.8912363562097001, | |
| "learning_rate": 4.714605484051483e-05, | |
| "loss": 0.8371, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.45584905660377356, | |
| "grad_norm": 0.7676920170222373, | |
| "learning_rate": 4.7118074986010076e-05, | |
| "loss": 0.7677, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.4573584905660377, | |
| "grad_norm": 0.9101103649802816, | |
| "learning_rate": 4.709009513150532e-05, | |
| "loss": 0.7726, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.4588679245283019, | |
| "grad_norm": 0.865381015702614, | |
| "learning_rate": 4.706211527700056e-05, | |
| "loss": 0.77, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.46037735849056605, | |
| "grad_norm": 0.629343619599737, | |
| "learning_rate": 4.7034135422495806e-05, | |
| "loss": 0.7224, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.4618867924528302, | |
| "grad_norm": 0.9800276759621851, | |
| "learning_rate": 4.700615556799105e-05, | |
| "loss": 0.8232, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.4633962264150943, | |
| "grad_norm": 0.5404048701315444, | |
| "learning_rate": 4.697817571348629e-05, | |
| "loss": 0.7897, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.4649056603773585, | |
| "grad_norm": 1.0278186663808309, | |
| "learning_rate": 4.6950195858981536e-05, | |
| "loss": 0.8159, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.46641509433962264, | |
| "grad_norm": 0.5823665222431843, | |
| "learning_rate": 4.692221600447678e-05, | |
| "loss": 0.7906, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.4679245283018868, | |
| "grad_norm": 0.7252680198432144, | |
| "learning_rate": 4.689423614997202e-05, | |
| "loss": 0.764, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.46943396226415096, | |
| "grad_norm": 0.6570038646216206, | |
| "learning_rate": 4.6866256295467266e-05, | |
| "loss": 0.6926, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.47094339622641507, | |
| "grad_norm": 0.5621715975516278, | |
| "learning_rate": 4.683827644096251e-05, | |
| "loss": 0.6961, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.47245283018867923, | |
| "grad_norm": 0.4779432536942869, | |
| "learning_rate": 4.681029658645775e-05, | |
| "loss": 0.7378, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.4739622641509434, | |
| "grad_norm": 0.674459154650339, | |
| "learning_rate": 4.6782316731952996e-05, | |
| "loss": 0.7874, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.47547169811320755, | |
| "grad_norm": 0.5004466668430579, | |
| "learning_rate": 4.675433687744824e-05, | |
| "loss": 0.7406, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.4769811320754717, | |
| "grad_norm": 0.5395088155154139, | |
| "learning_rate": 4.672635702294348e-05, | |
| "loss": 0.7362, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.4784905660377359, | |
| "grad_norm": 0.6418834874570026, | |
| "learning_rate": 4.6698377168438726e-05, | |
| "loss": 0.8157, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.4797662965902652, | |
| "learning_rate": 4.667039731393397e-05, | |
| "loss": 0.7916, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.48150943396226414, | |
| "grad_norm": 0.6366002378504793, | |
| "learning_rate": 4.664241745942921e-05, | |
| "loss": 0.8928, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.4830188679245283, | |
| "grad_norm": 0.7131620418095839, | |
| "learning_rate": 4.6614437604924456e-05, | |
| "loss": 0.7556, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.48452830188679247, | |
| "grad_norm": 0.5851313011583172, | |
| "learning_rate": 4.65864577504197e-05, | |
| "loss": 0.8102, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.48603773584905663, | |
| "grad_norm": 0.5129745799858986, | |
| "learning_rate": 4.655847789591494e-05, | |
| "loss": 0.7388, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.48754716981132074, | |
| "grad_norm": 0.48227342480016644, | |
| "learning_rate": 4.6530498041410186e-05, | |
| "loss": 0.7968, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.4890566037735849, | |
| "grad_norm": 0.5765939446109118, | |
| "learning_rate": 4.650251818690543e-05, | |
| "loss": 0.8276, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.49056603773584906, | |
| "grad_norm": 0.45210550750524087, | |
| "learning_rate": 4.647453833240067e-05, | |
| "loss": 0.7838, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.4920754716981132, | |
| "grad_norm": 0.5235976941093462, | |
| "learning_rate": 4.6446558477895916e-05, | |
| "loss": 0.8524, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.4935849056603774, | |
| "grad_norm": 0.948045987049377, | |
| "learning_rate": 4.641857862339116e-05, | |
| "loss": 0.783, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.4950943396226415, | |
| "grad_norm": 0.8405622911075784, | |
| "learning_rate": 4.63905987688864e-05, | |
| "loss": 0.7448, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.49660377358490565, | |
| "grad_norm": 0.5163025902946453, | |
| "learning_rate": 4.6362618914381646e-05, | |
| "loss": 0.8237, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.4981132075471698, | |
| "grad_norm": 0.7136811122203169, | |
| "learning_rate": 4.633463905987689e-05, | |
| "loss": 0.8279, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.499622641509434, | |
| "grad_norm": 0.5478782108688454, | |
| "learning_rate": 4.630665920537213e-05, | |
| "loss": 0.7627, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.5011320754716981, | |
| "grad_norm": 0.5191383894592315, | |
| "learning_rate": 4.6278679350867376e-05, | |
| "loss": 0.7477, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.5026415094339622, | |
| "grad_norm": 0.7132287751115131, | |
| "learning_rate": 4.625069949636262e-05, | |
| "loss": 0.715, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.5041509433962265, | |
| "grad_norm": 0.5596228616176007, | |
| "learning_rate": 4.622271964185786e-05, | |
| "loss": 0.8015, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.5056603773584906, | |
| "grad_norm": 0.44498739497428585, | |
| "learning_rate": 4.6194739787353107e-05, | |
| "loss": 0.7529, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.5071698113207547, | |
| "grad_norm": 0.5546353422236862, | |
| "learning_rate": 4.616675993284835e-05, | |
| "loss": 0.7459, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.5086792452830189, | |
| "grad_norm": 0.45280156933343746, | |
| "learning_rate": 4.613878007834359e-05, | |
| "loss": 0.8087, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.510188679245283, | |
| "grad_norm": 0.6178648389283045, | |
| "learning_rate": 4.611080022383884e-05, | |
| "loss": 0.8791, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.5116981132075472, | |
| "grad_norm": 0.4595941605540381, | |
| "learning_rate": 4.608282036933409e-05, | |
| "loss": 0.7174, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.5132075471698113, | |
| "grad_norm": 0.5584877878966316, | |
| "learning_rate": 4.605484051482932e-05, | |
| "loss": 0.7559, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5147169811320754, | |
| "grad_norm": 0.5173754218021847, | |
| "learning_rate": 4.602686066032457e-05, | |
| "loss": 0.7863, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.5162264150943396, | |
| "grad_norm": 0.5740142718980299, | |
| "learning_rate": 4.599888080581981e-05, | |
| "loss": 0.7521, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.5177358490566037, | |
| "grad_norm": 0.48616061117595205, | |
| "learning_rate": 4.5970900951315053e-05, | |
| "loss": 0.7239, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.519245283018868, | |
| "grad_norm": 0.5948531580198754, | |
| "learning_rate": 4.59429210968103e-05, | |
| "loss": 0.7209, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.5207547169811321, | |
| "grad_norm": 0.7290623947785152, | |
| "learning_rate": 4.591494124230554e-05, | |
| "loss": 0.759, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.5222641509433962, | |
| "grad_norm": 0.488056932359024, | |
| "learning_rate": 4.5886961387800783e-05, | |
| "loss": 0.7456, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5237735849056604, | |
| "grad_norm": 0.47054544364109835, | |
| "learning_rate": 4.5858981533296034e-05, | |
| "loss": 0.7124, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.5252830188679245, | |
| "grad_norm": 0.50177713952636, | |
| "learning_rate": 4.583100167879128e-05, | |
| "loss": 0.7519, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5267924528301887, | |
| "grad_norm": 0.6140162666917494, | |
| "learning_rate": 4.580302182428652e-05, | |
| "loss": 0.7098, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.5283018867924528, | |
| "grad_norm": 0.4266279539939302, | |
| "learning_rate": 4.577504196978176e-05, | |
| "loss": 0.7665, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5298113207547169, | |
| "grad_norm": 0.9041107079579056, | |
| "learning_rate": 4.5747062115277e-05, | |
| "loss": 0.7677, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.5313207547169811, | |
| "grad_norm": 0.5038590825788706, | |
| "learning_rate": 4.5719082260772244e-05, | |
| "loss": 0.7628, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5328301886792453, | |
| "grad_norm": 0.44535742550996604, | |
| "learning_rate": 4.569110240626749e-05, | |
| "loss": 0.7568, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.5343396226415095, | |
| "grad_norm": 0.4862471874939931, | |
| "learning_rate": 4.566312255176273e-05, | |
| "loss": 0.7696, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5358490566037736, | |
| "grad_norm": 0.4386852797735026, | |
| "learning_rate": 4.5635142697257974e-05, | |
| "loss": 0.7495, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.5373584905660377, | |
| "grad_norm": 0.6316124118050598, | |
| "learning_rate": 4.560716284275322e-05, | |
| "loss": 0.8228, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5388679245283019, | |
| "grad_norm": 0.41211245426770826, | |
| "learning_rate": 4.557918298824847e-05, | |
| "loss": 0.7613, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.540377358490566, | |
| "grad_norm": 0.5868461457556423, | |
| "learning_rate": 4.555120313374371e-05, | |
| "loss": 0.7506, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5418867924528302, | |
| "grad_norm": 0.4717702823928158, | |
| "learning_rate": 4.5523223279238954e-05, | |
| "loss": 0.8838, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.5433962264150943, | |
| "grad_norm": 0.5003003408121309, | |
| "learning_rate": 4.54952434247342e-05, | |
| "loss": 0.8818, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5449056603773584, | |
| "grad_norm": 0.4907938538260416, | |
| "learning_rate": 4.5467263570229434e-05, | |
| "loss": 0.7319, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.5464150943396227, | |
| "grad_norm": 0.4644540043260514, | |
| "learning_rate": 4.543928371572468e-05, | |
| "loss": 0.735, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5479245283018868, | |
| "grad_norm": 0.5560732867320977, | |
| "learning_rate": 4.541130386121992e-05, | |
| "loss": 0.8294, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.549433962264151, | |
| "grad_norm": 0.41552200602803446, | |
| "learning_rate": 4.5383324006715164e-05, | |
| "loss": 0.8152, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5509433962264151, | |
| "grad_norm": 0.539878788115719, | |
| "learning_rate": 4.535534415221041e-05, | |
| "loss": 0.7746, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.5524528301886793, | |
| "grad_norm": 0.4414859813055167, | |
| "learning_rate": 4.532736429770566e-05, | |
| "loss": 0.7332, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5539622641509434, | |
| "grad_norm": 0.4321477596135969, | |
| "learning_rate": 4.52993844432009e-05, | |
| "loss": 0.7245, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.5554716981132075, | |
| "grad_norm": 1.2570879787382243, | |
| "learning_rate": 4.5271404588696144e-05, | |
| "loss": 0.7191, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.5569811320754717, | |
| "grad_norm": 0.5742427583402032, | |
| "learning_rate": 4.524342473419139e-05, | |
| "loss": 0.8317, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.5584905660377358, | |
| "grad_norm": 0.4741231113436004, | |
| "learning_rate": 4.521544487968663e-05, | |
| "loss": 0.6925, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 0.48896613714320825, | |
| "learning_rate": 4.518746502518187e-05, | |
| "loss": 0.6983, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.5615094339622642, | |
| "grad_norm": 0.42917322921737827, | |
| "learning_rate": 4.515948517067711e-05, | |
| "loss": 0.7469, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.5630188679245283, | |
| "grad_norm": 0.41025766821703724, | |
| "learning_rate": 4.5131505316172354e-05, | |
| "loss": 0.7545, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.5645283018867925, | |
| "grad_norm": 0.5635148674930043, | |
| "learning_rate": 4.51035254616676e-05, | |
| "loss": 0.8145, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5660377358490566, | |
| "grad_norm": 0.40552510627195454, | |
| "learning_rate": 4.507554560716285e-05, | |
| "loss": 0.7851, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.5675471698113208, | |
| "grad_norm": 0.7307038202791475, | |
| "learning_rate": 4.504756575265809e-05, | |
| "loss": 0.8569, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5690566037735849, | |
| "grad_norm": 1.0811129556259813, | |
| "learning_rate": 4.5019585898153335e-05, | |
| "loss": 0.9044, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.570566037735849, | |
| "grad_norm": 0.42008950694302577, | |
| "learning_rate": 4.499160604364858e-05, | |
| "loss": 0.8506, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.5720754716981132, | |
| "grad_norm": 0.42648022512202355, | |
| "learning_rate": 4.496362618914382e-05, | |
| "loss": 0.8346, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.5735849056603773, | |
| "grad_norm": 0.45926513207861036, | |
| "learning_rate": 4.4935646334639065e-05, | |
| "loss": 0.8785, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5750943396226416, | |
| "grad_norm": 0.43277141797293067, | |
| "learning_rate": 4.49076664801343e-05, | |
| "loss": 0.7923, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.5766037735849057, | |
| "grad_norm": 0.4171395355778019, | |
| "learning_rate": 4.4879686625629545e-05, | |
| "loss": 0.7133, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.5781132075471698, | |
| "grad_norm": 0.49673115482652136, | |
| "learning_rate": 4.485170677112479e-05, | |
| "loss": 0.7325, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.579622641509434, | |
| "grad_norm": 0.7116762388295127, | |
| "learning_rate": 4.482372691662003e-05, | |
| "loss": 0.7799, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5811320754716981, | |
| "grad_norm": 0.8174353919858245, | |
| "learning_rate": 4.479574706211528e-05, | |
| "loss": 0.8845, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.5826415094339623, | |
| "grad_norm": 0.8335644956676623, | |
| "learning_rate": 4.4767767207610525e-05, | |
| "loss": 0.7319, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.5841509433962264, | |
| "grad_norm": 0.6876169170804783, | |
| "learning_rate": 4.473978735310577e-05, | |
| "loss": 0.8176, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.5856603773584905, | |
| "grad_norm": 0.5536550742512403, | |
| "learning_rate": 4.471180749860101e-05, | |
| "loss": 0.8086, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5871698113207547, | |
| "grad_norm": 0.5323019116550912, | |
| "learning_rate": 4.4683827644096255e-05, | |
| "loss": 0.7065, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.5886792452830188, | |
| "grad_norm": 0.5135931602373224, | |
| "learning_rate": 4.46558477895915e-05, | |
| "loss": 0.7253, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5901886792452831, | |
| "grad_norm": 0.4559259275389328, | |
| "learning_rate": 4.462786793508674e-05, | |
| "loss": 0.7999, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.5916981132075472, | |
| "grad_norm": 0.44803810661731486, | |
| "learning_rate": 4.459988808058198e-05, | |
| "loss": 0.8239, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5932075471698113, | |
| "grad_norm": 0.5069482759856541, | |
| "learning_rate": 4.457190822607722e-05, | |
| "loss": 0.8289, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.5947169811320755, | |
| "grad_norm": 0.5671863335272355, | |
| "learning_rate": 4.454392837157247e-05, | |
| "loss": 0.8187, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.5962264150943396, | |
| "grad_norm": 0.4606809083404723, | |
| "learning_rate": 4.4515948517067715e-05, | |
| "loss": 0.8192, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5977358490566038, | |
| "grad_norm": 0.38018446697126707, | |
| "learning_rate": 4.448796866256296e-05, | |
| "loss": 0.7691, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.5992452830188679, | |
| "grad_norm": 0.4323225254781683, | |
| "learning_rate": 4.44599888080582e-05, | |
| "loss": 0.8939, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.600754716981132, | |
| "grad_norm": 0.428772129920215, | |
| "learning_rate": 4.4432008953553445e-05, | |
| "loss": 0.7648, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.6022641509433962, | |
| "grad_norm": 0.4106799432786821, | |
| "learning_rate": 4.440402909904869e-05, | |
| "loss": 0.7747, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.6037735849056604, | |
| "grad_norm": 0.9201724559585969, | |
| "learning_rate": 4.437604924454393e-05, | |
| "loss": 0.7752, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6052830188679246, | |
| "grad_norm": 0.63142220777098, | |
| "learning_rate": 4.4348069390039175e-05, | |
| "loss": 0.7367, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.6067924528301887, | |
| "grad_norm": 0.5430276304717978, | |
| "learning_rate": 4.432008953553441e-05, | |
| "loss": 0.804, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.6083018867924528, | |
| "grad_norm": 0.439182439759478, | |
| "learning_rate": 4.429210968102966e-05, | |
| "loss": 0.8251, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.609811320754717, | |
| "grad_norm": 0.5486244365234464, | |
| "learning_rate": 4.4264129826524905e-05, | |
| "loss": 0.7075, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.6113207547169811, | |
| "grad_norm": 0.5277673001485089, | |
| "learning_rate": 4.423614997202015e-05, | |
| "loss": 0.7607, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.6128301886792453, | |
| "grad_norm": 0.5256998794976435, | |
| "learning_rate": 4.420817011751539e-05, | |
| "loss": 0.7948, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.6143396226415094, | |
| "grad_norm": 0.5122728349415228, | |
| "learning_rate": 4.4180190263010635e-05, | |
| "loss": 0.8268, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.6158490566037735, | |
| "grad_norm": 0.5089282410409556, | |
| "learning_rate": 4.415221040850588e-05, | |
| "loss": 0.7666, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.6173584905660378, | |
| "grad_norm": 1.4029487791422663, | |
| "learning_rate": 4.412423055400112e-05, | |
| "loss": 0.7687, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.6188679245283019, | |
| "grad_norm": 1.0081149280553756, | |
| "learning_rate": 4.4096250699496365e-05, | |
| "loss": 0.7524, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6203773584905661, | |
| "grad_norm": 0.9006929253576336, | |
| "learning_rate": 4.406827084499161e-05, | |
| "loss": 0.8247, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.6218867924528302, | |
| "grad_norm": 0.8411745640797793, | |
| "learning_rate": 4.4040290990486845e-05, | |
| "loss": 0.8105, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.6233962264150943, | |
| "grad_norm": 0.7294710313107107, | |
| "learning_rate": 4.4012311135982096e-05, | |
| "loss": 0.748, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.6249056603773585, | |
| "grad_norm": 4.837670218659803, | |
| "learning_rate": 4.398433128147734e-05, | |
| "loss": 0.8315, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.6264150943396226, | |
| "grad_norm": 0.7291575178965143, | |
| "learning_rate": 4.395635142697258e-05, | |
| "loss": 0.7576, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.6279245283018868, | |
| "grad_norm": 0.67449080523453, | |
| "learning_rate": 4.3928371572467826e-05, | |
| "loss": 0.7537, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.6294339622641509, | |
| "grad_norm": 0.5951820027076258, | |
| "learning_rate": 4.390039171796307e-05, | |
| "loss": 0.7073, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.630943396226415, | |
| "grad_norm": 0.7759167017515514, | |
| "learning_rate": 4.387241186345831e-05, | |
| "loss": 0.7775, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6324528301886793, | |
| "grad_norm": 0.65776925709979, | |
| "learning_rate": 4.3844432008953556e-05, | |
| "loss": 0.7754, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.6339622641509434, | |
| "grad_norm": 0.783091467326888, | |
| "learning_rate": 4.38164521544488e-05, | |
| "loss": 0.7448, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6354716981132076, | |
| "grad_norm": 0.6111140171989418, | |
| "learning_rate": 4.378847229994404e-05, | |
| "loss": 0.7472, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.6369811320754717, | |
| "grad_norm": 0.6125601796793868, | |
| "learning_rate": 4.3760492445439286e-05, | |
| "loss": 0.7546, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6384905660377358, | |
| "grad_norm": 0.5915644550948223, | |
| "learning_rate": 4.373251259093453e-05, | |
| "loss": 0.8102, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 4.073736000326432, | |
| "learning_rate": 4.370453273642977e-05, | |
| "loss": 0.7923, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.6415094339622641, | |
| "grad_norm": 0.8436199611615184, | |
| "learning_rate": 4.3676552881925016e-05, | |
| "loss": 0.8023, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6430188679245283, | |
| "grad_norm": 0.5606909686356082, | |
| "learning_rate": 4.364857302742026e-05, | |
| "loss": 0.7346, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.6445283018867924, | |
| "grad_norm": 0.7771075034666999, | |
| "learning_rate": 4.36205931729155e-05, | |
| "loss": 0.6807, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.6460377358490565, | |
| "grad_norm": 0.5192219758704827, | |
| "learning_rate": 4.3592613318410746e-05, | |
| "loss": 0.7678, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.6475471698113208, | |
| "grad_norm": 0.6083104527276569, | |
| "learning_rate": 4.356463346390599e-05, | |
| "loss": 0.7552, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.6490566037735849, | |
| "grad_norm": 0.601019714720564, | |
| "learning_rate": 4.353665360940123e-05, | |
| "loss": 0.8074, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6505660377358491, | |
| "grad_norm": 0.6089857562150989, | |
| "learning_rate": 4.3508673754896476e-05, | |
| "loss": 0.7508, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.6520754716981132, | |
| "grad_norm": 0.6249210482138133, | |
| "learning_rate": 4.348069390039172e-05, | |
| "loss": 0.7558, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.6535849056603774, | |
| "grad_norm": 2.537695860786402, | |
| "learning_rate": 4.345271404588696e-05, | |
| "loss": 0.817, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.6550943396226415, | |
| "grad_norm": 0.9217891149931049, | |
| "learning_rate": 4.3424734191382206e-05, | |
| "loss": 0.7971, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6566037735849056, | |
| "grad_norm": 0.5519943971261175, | |
| "learning_rate": 4.339675433687745e-05, | |
| "loss": 0.8088, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.6581132075471698, | |
| "grad_norm": 0.7425761436709358, | |
| "learning_rate": 4.336877448237269e-05, | |
| "loss": 0.756, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.659622641509434, | |
| "grad_norm": 0.6942699916554731, | |
| "learning_rate": 4.3340794627867936e-05, | |
| "loss": 0.8032, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.6611320754716982, | |
| "grad_norm": 0.5334615246424551, | |
| "learning_rate": 4.331281477336318e-05, | |
| "loss": 0.7998, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.6626415094339623, | |
| "grad_norm": 0.8292482053092843, | |
| "learning_rate": 4.328483491885842e-05, | |
| "loss": 0.7431, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.6641509433962264, | |
| "grad_norm": 0.4530926505666787, | |
| "learning_rate": 4.3256855064353666e-05, | |
| "loss": 0.7596, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6656603773584906, | |
| "grad_norm": 0.6624652358047928, | |
| "learning_rate": 4.322887520984891e-05, | |
| "loss": 0.7712, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.6671698113207547, | |
| "grad_norm": 0.5064411908735837, | |
| "learning_rate": 4.320089535534415e-05, | |
| "loss": 0.695, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.6686792452830189, | |
| "grad_norm": 0.5377369847550588, | |
| "learning_rate": 4.31729155008394e-05, | |
| "loss": 0.756, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.670188679245283, | |
| "grad_norm": 0.4983199069825207, | |
| "learning_rate": 4.314493564633464e-05, | |
| "loss": 0.7807, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6716981132075471, | |
| "grad_norm": 0.44158119758359315, | |
| "learning_rate": 4.311695579182988e-05, | |
| "loss": 0.7754, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.6732075471698113, | |
| "grad_norm": 0.5596653554139499, | |
| "learning_rate": 4.3088975937325127e-05, | |
| "loss": 0.8195, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.6747169811320755, | |
| "grad_norm": 0.43436077791659994, | |
| "learning_rate": 4.306099608282037e-05, | |
| "loss": 0.7604, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.6762264150943397, | |
| "grad_norm": 0.5030810215462936, | |
| "learning_rate": 4.303301622831561e-05, | |
| "loss": 0.6785, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.6777358490566038, | |
| "grad_norm": 0.5482694189131312, | |
| "learning_rate": 4.300503637381086e-05, | |
| "loss": 0.7199, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.6792452830188679, | |
| "grad_norm": 0.5967150137777674, | |
| "learning_rate": 4.29770565193061e-05, | |
| "loss": 0.7983, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6807547169811321, | |
| "grad_norm": 0.5940918534861581, | |
| "learning_rate": 4.294907666480134e-05, | |
| "loss": 0.7591, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.6822641509433962, | |
| "grad_norm": 0.5427565481245458, | |
| "learning_rate": 4.2921096810296593e-05, | |
| "loss": 0.8326, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.6837735849056604, | |
| "grad_norm": 2.638916003074341, | |
| "learning_rate": 4.289311695579184e-05, | |
| "loss": 0.7776, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.6852830188679245, | |
| "grad_norm": 0.8031629641842678, | |
| "learning_rate": 4.2865137101287073e-05, | |
| "loss": 0.7061, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.6867924528301886, | |
| "grad_norm": 0.4705714421622477, | |
| "learning_rate": 4.283715724678232e-05, | |
| "loss": 0.8115, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.6883018867924529, | |
| "grad_norm": 0.694102381672302, | |
| "learning_rate": 4.280917739227756e-05, | |
| "loss": 0.7455, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.689811320754717, | |
| "grad_norm": 0.7026076420751877, | |
| "learning_rate": 4.2781197537772804e-05, | |
| "loss": 0.7419, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.6913207547169812, | |
| "grad_norm": 0.5435116018268908, | |
| "learning_rate": 4.275321768326805e-05, | |
| "loss": 0.7141, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.6928301886792453, | |
| "grad_norm": 0.7695258633555748, | |
| "learning_rate": 4.272523782876329e-05, | |
| "loss": 0.7067, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.6943396226415094, | |
| "grad_norm": 1.7975424022373265, | |
| "learning_rate": 4.2697257974258534e-05, | |
| "loss": 0.8697, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6958490566037736, | |
| "grad_norm": 0.6317051374255238, | |
| "learning_rate": 4.2669278119753784e-05, | |
| "loss": 0.7739, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.6973584905660377, | |
| "grad_norm": 0.6351687672902928, | |
| "learning_rate": 4.264129826524903e-05, | |
| "loss": 0.6983, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.6988679245283019, | |
| "grad_norm": 0.6244759183757971, | |
| "learning_rate": 4.261331841074427e-05, | |
| "loss": 0.7031, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.700377358490566, | |
| "grad_norm": 0.5514796773294356, | |
| "learning_rate": 4.258533855623951e-05, | |
| "loss": 0.6884, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.7018867924528301, | |
| "grad_norm": 0.4726505169399417, | |
| "learning_rate": 4.255735870173475e-05, | |
| "loss": 0.7382, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.7033962264150944, | |
| "grad_norm": 0.6739904189805587, | |
| "learning_rate": 4.2529378847229994e-05, | |
| "loss": 0.8122, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.7049056603773585, | |
| "grad_norm": 0.4821838176068059, | |
| "learning_rate": 4.250139899272524e-05, | |
| "loss": 0.8558, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.7064150943396227, | |
| "grad_norm": 0.44367485643876686, | |
| "learning_rate": 4.247341913822048e-05, | |
| "loss": 0.7216, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.7079245283018868, | |
| "grad_norm": 0.4529362658881182, | |
| "learning_rate": 4.2445439283715724e-05, | |
| "loss": 0.6532, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.7094339622641509, | |
| "grad_norm": 0.3961737139001265, | |
| "learning_rate": 4.241745942921097e-05, | |
| "loss": 0.7866, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.7109433962264151, | |
| "grad_norm": 0.5258997289167945, | |
| "learning_rate": 4.238947957470622e-05, | |
| "loss": 0.8513, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.7124528301886792, | |
| "grad_norm": 0.40132597205607645, | |
| "learning_rate": 4.236149972020146e-05, | |
| "loss": 0.8006, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.7139622641509434, | |
| "grad_norm": 0.4178549903280029, | |
| "learning_rate": 4.2333519865696704e-05, | |
| "loss": 0.7349, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.7154716981132075, | |
| "grad_norm": 0.6137285870140219, | |
| "learning_rate": 4.230554001119195e-05, | |
| "loss": 0.8465, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.7169811320754716, | |
| "grad_norm": 0.549511124878987, | |
| "learning_rate": 4.2277560156687184e-05, | |
| "loss": 0.6763, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.7184905660377359, | |
| "grad_norm": 0.46406879130022355, | |
| "learning_rate": 4.224958030218243e-05, | |
| "loss": 0.7497, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.4888047862828803, | |
| "learning_rate": 4.222160044767767e-05, | |
| "loss": 0.7669, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.7215094339622642, | |
| "grad_norm": 0.49711406759007537, | |
| "learning_rate": 4.2193620593172914e-05, | |
| "loss": 0.8006, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.7230188679245283, | |
| "grad_norm": 0.4176159200268463, | |
| "learning_rate": 4.216564073866816e-05, | |
| "loss": 0.7379, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.7245283018867924, | |
| "grad_norm": 0.5327893586007508, | |
| "learning_rate": 4.213766088416341e-05, | |
| "loss": 0.8138, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7260377358490566, | |
| "grad_norm": 0.9142602019147515, | |
| "learning_rate": 4.210968102965865e-05, | |
| "loss": 0.7227, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.7275471698113207, | |
| "grad_norm": 0.8778784458357308, | |
| "learning_rate": 4.2081701175153894e-05, | |
| "loss": 0.7579, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.7290566037735849, | |
| "grad_norm": 0.6964388295748577, | |
| "learning_rate": 4.205372132064914e-05, | |
| "loss": 0.712, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.730566037735849, | |
| "grad_norm": 0.43020746781080765, | |
| "learning_rate": 4.202574146614438e-05, | |
| "loss": 0.7171, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.7320754716981132, | |
| "grad_norm": 0.5749225465192066, | |
| "learning_rate": 4.199776161163962e-05, | |
| "loss": 0.7308, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.7335849056603774, | |
| "grad_norm": 0.6045836791728121, | |
| "learning_rate": 4.196978175713486e-05, | |
| "loss": 0.7837, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7350943396226415, | |
| "grad_norm": 0.853172024259124, | |
| "learning_rate": 4.1941801902630104e-05, | |
| "loss": 0.7823, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.7366037735849057, | |
| "grad_norm": 0.8328941937503014, | |
| "learning_rate": 4.191382204812535e-05, | |
| "loss": 0.7805, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7381132075471698, | |
| "grad_norm": 0.5856813273142618, | |
| "learning_rate": 4.18858421936206e-05, | |
| "loss": 0.7613, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.7396226415094339, | |
| "grad_norm": 0.7017696574876483, | |
| "learning_rate": 4.185786233911584e-05, | |
| "loss": 0.7977, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.7411320754716981, | |
| "grad_norm": 0.5883283825983562, | |
| "learning_rate": 4.1829882484611085e-05, | |
| "loss": 0.6595, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.7426415094339622, | |
| "grad_norm": 0.9189100296218359, | |
| "learning_rate": 4.180190263010633e-05, | |
| "loss": 0.8225, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7441509433962264, | |
| "grad_norm": 0.4621273821982856, | |
| "learning_rate": 4.177392277560157e-05, | |
| "loss": 0.7267, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.7456603773584906, | |
| "grad_norm": 0.7263731461661145, | |
| "learning_rate": 4.1745942921096815e-05, | |
| "loss": 0.8351, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7471698113207547, | |
| "grad_norm": 0.4239681400253427, | |
| "learning_rate": 4.171796306659206e-05, | |
| "loss": 0.8602, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.7486792452830189, | |
| "grad_norm": 0.5587028545439432, | |
| "learning_rate": 4.1689983212087295e-05, | |
| "loss": 0.7781, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.750188679245283, | |
| "grad_norm": 0.43677162709699907, | |
| "learning_rate": 4.166200335758254e-05, | |
| "loss": 0.7885, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.7516981132075472, | |
| "grad_norm": 0.5956328426453763, | |
| "learning_rate": 4.163402350307778e-05, | |
| "loss": 0.7078, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7532075471698113, | |
| "grad_norm": 0.44802871978378656, | |
| "learning_rate": 4.160604364857303e-05, | |
| "loss": 0.7486, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.7547169811320755, | |
| "grad_norm": 0.40451743130792983, | |
| "learning_rate": 4.1578063794068275e-05, | |
| "loss": 0.7091, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7562264150943396, | |
| "grad_norm": 0.4482261465687673, | |
| "learning_rate": 4.155008393956352e-05, | |
| "loss": 0.708, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.7577358490566037, | |
| "grad_norm": 4.209187428133594, | |
| "learning_rate": 4.152210408505876e-05, | |
| "loss": 0.7711, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.759245283018868, | |
| "grad_norm": 0.6084246843903299, | |
| "learning_rate": 4.1494124230554005e-05, | |
| "loss": 0.7152, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.7607547169811321, | |
| "grad_norm": 0.5301187311477114, | |
| "learning_rate": 4.146614437604925e-05, | |
| "loss": 0.822, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7622641509433963, | |
| "grad_norm": 0.5775612395962103, | |
| "learning_rate": 4.143816452154449e-05, | |
| "loss": 0.773, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.7637735849056604, | |
| "grad_norm": 0.5169776427958961, | |
| "learning_rate": 4.141018466703973e-05, | |
| "loss": 0.7756, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.7652830188679245, | |
| "grad_norm": 0.6175580792907378, | |
| "learning_rate": 4.138220481253497e-05, | |
| "loss": 0.7714, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.7667924528301887, | |
| "grad_norm": 0.5202456866228283, | |
| "learning_rate": 4.135422495803022e-05, | |
| "loss": 0.7569, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.7683018867924528, | |
| "grad_norm": 0.43230254405968893, | |
| "learning_rate": 4.1326245103525465e-05, | |
| "loss": 0.7701, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.769811320754717, | |
| "grad_norm": 0.6199856475998696, | |
| "learning_rate": 4.129826524902071e-05, | |
| "loss": 0.7365, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7713207547169811, | |
| "grad_norm": 0.4590557643196903, | |
| "learning_rate": 4.127028539451595e-05, | |
| "loss": 0.7349, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.7728301886792452, | |
| "grad_norm": 0.5947228546936734, | |
| "learning_rate": 4.1242305540011195e-05, | |
| "loss": 0.7292, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.7743396226415095, | |
| "grad_norm": 0.503790040694598, | |
| "learning_rate": 4.121432568550644e-05, | |
| "loss": 0.7439, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.7758490566037736, | |
| "grad_norm": 0.6227639804784848, | |
| "learning_rate": 4.118634583100168e-05, | |
| "loss": 0.7778, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7773584905660378, | |
| "grad_norm": 0.7695881075574827, | |
| "learning_rate": 4.1158365976496925e-05, | |
| "loss": 0.7474, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.7788679245283019, | |
| "grad_norm": 0.4832882825699558, | |
| "learning_rate": 4.113038612199216e-05, | |
| "loss": 0.7468, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.780377358490566, | |
| "grad_norm": 0.5553008775060718, | |
| "learning_rate": 4.110240626748741e-05, | |
| "loss": 0.6952, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.7818867924528302, | |
| "grad_norm": 0.5887331512376914, | |
| "learning_rate": 4.1074426412982655e-05, | |
| "loss": 0.753, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.7833962264150943, | |
| "grad_norm": 0.44943535047602, | |
| "learning_rate": 4.10464465584779e-05, | |
| "loss": 0.7084, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.7849056603773585, | |
| "grad_norm": 0.6695367567635757, | |
| "learning_rate": 4.101846670397314e-05, | |
| "loss": 0.8359, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7864150943396226, | |
| "grad_norm": 0.4641289136731032, | |
| "learning_rate": 4.0990486849468386e-05, | |
| "loss": 0.727, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.7879245283018868, | |
| "grad_norm": 0.7406198663798486, | |
| "learning_rate": 4.096250699496363e-05, | |
| "loss": 0.7915, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.789433962264151, | |
| "grad_norm": 0.44176398853870474, | |
| "learning_rate": 4.093452714045887e-05, | |
| "loss": 0.6947, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.7909433962264151, | |
| "grad_norm": 0.48042954012719924, | |
| "learning_rate": 4.0906547285954116e-05, | |
| "loss": 0.7552, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.7924528301886793, | |
| "grad_norm": 0.5842724845306636, | |
| "learning_rate": 4.087856743144936e-05, | |
| "loss": 0.8175, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.7939622641509434, | |
| "grad_norm": 0.4284726371980202, | |
| "learning_rate": 4.08505875769446e-05, | |
| "loss": 0.7494, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.7954716981132075, | |
| "grad_norm": 0.5254149807506523, | |
| "learning_rate": 4.0822607722439846e-05, | |
| "loss": 0.7549, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.7969811320754717, | |
| "grad_norm": 0.48365346339231174, | |
| "learning_rate": 4.079462786793509e-05, | |
| "loss": 0.6465, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.7984905660377358, | |
| "grad_norm": 0.5361901921704368, | |
| "learning_rate": 4.076664801343033e-05, | |
| "loss": 0.7893, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.4450391820564859, | |
| "learning_rate": 4.0738668158925576e-05, | |
| "loss": 0.7596, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.8015094339622642, | |
| "grad_norm": 0.4238114156508658, | |
| "learning_rate": 4.071068830442082e-05, | |
| "loss": 0.7685, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.8030188679245283, | |
| "grad_norm": 0.48625846049027577, | |
| "learning_rate": 4.068270844991606e-05, | |
| "loss": 0.6887, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.8045283018867925, | |
| "grad_norm": 0.4417344646967358, | |
| "learning_rate": 4.0654728595411306e-05, | |
| "loss": 0.8144, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.8060377358490566, | |
| "grad_norm": 0.38616318769298813, | |
| "learning_rate": 4.062674874090655e-05, | |
| "loss": 0.7711, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.8075471698113208, | |
| "grad_norm": 0.4732687688025921, | |
| "learning_rate": 4.059876888640179e-05, | |
| "loss": 0.7262, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.8090566037735849, | |
| "grad_norm": 0.3662632839035369, | |
| "learning_rate": 4.0570789031897036e-05, | |
| "loss": 0.7762, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.810566037735849, | |
| "grad_norm": 0.41660011598030444, | |
| "learning_rate": 4.054280917739228e-05, | |
| "loss": 0.8063, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.8120754716981132, | |
| "grad_norm": 0.5061770396979196, | |
| "learning_rate": 4.051482932288752e-05, | |
| "loss": 0.8055, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.8135849056603773, | |
| "grad_norm": 0.4321594676145302, | |
| "learning_rate": 4.0486849468382766e-05, | |
| "loss": 0.6895, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.8150943396226416, | |
| "grad_norm": 0.4154273903737854, | |
| "learning_rate": 4.045886961387801e-05, | |
| "loss": 0.8246, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.8166037735849057, | |
| "grad_norm": 0.44089593064248783, | |
| "learning_rate": 4.043088975937325e-05, | |
| "loss": 0.6919, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.8181132075471698, | |
| "grad_norm": 0.3931592707406957, | |
| "learning_rate": 4.0402909904868496e-05, | |
| "loss": 0.7232, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.819622641509434, | |
| "grad_norm": 0.3824488662725976, | |
| "learning_rate": 4.037493005036374e-05, | |
| "loss": 0.678, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.8211320754716981, | |
| "grad_norm": 0.47459855214234453, | |
| "learning_rate": 4.034695019585898e-05, | |
| "loss": 0.6916, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.8226415094339623, | |
| "grad_norm": 0.4092162364813964, | |
| "learning_rate": 4.0318970341354226e-05, | |
| "loss": 0.7284, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.8241509433962264, | |
| "grad_norm": 0.40158642010989914, | |
| "learning_rate": 4.029099048684947e-05, | |
| "loss": 0.7858, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.8256603773584905, | |
| "grad_norm": 0.47547007845195716, | |
| "learning_rate": 4.026301063234472e-05, | |
| "loss": 0.7484, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.8271698113207547, | |
| "grad_norm": 0.41353726994458534, | |
| "learning_rate": 4.0235030777839956e-05, | |
| "loss": 0.6895, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.8286792452830188, | |
| "grad_norm": 0.4261847130879816, | |
| "learning_rate": 4.02070509233352e-05, | |
| "loss": 0.7615, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.8301886792452831, | |
| "grad_norm": 0.3713485609487883, | |
| "learning_rate": 4.017907106883044e-05, | |
| "loss": 0.702, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8316981132075472, | |
| "grad_norm": 0.4940211687654344, | |
| "learning_rate": 4.0151091214325686e-05, | |
| "loss": 0.7926, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.8332075471698113, | |
| "grad_norm": 0.4418268573162059, | |
| "learning_rate": 4.012311135982093e-05, | |
| "loss": 0.7477, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.8347169811320755, | |
| "grad_norm": 0.4296889446715816, | |
| "learning_rate": 4.009513150531617e-05, | |
| "loss": 0.7066, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.8362264150943396, | |
| "grad_norm": 0.43481881270259315, | |
| "learning_rate": 4.0067151650811416e-05, | |
| "loss": 0.7104, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.8377358490566038, | |
| "grad_norm": 0.444949300217321, | |
| "learning_rate": 4.003917179630666e-05, | |
| "loss": 0.8165, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.8392452830188679, | |
| "grad_norm": 0.4167038227484225, | |
| "learning_rate": 4.00111919418019e-05, | |
| "loss": 0.7655, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.840754716981132, | |
| "grad_norm": 0.46442021729984984, | |
| "learning_rate": 3.998321208729715e-05, | |
| "loss": 0.8047, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.8422641509433962, | |
| "grad_norm": 0.452338392615249, | |
| "learning_rate": 3.995523223279239e-05, | |
| "loss": 0.6587, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8437735849056603, | |
| "grad_norm": 0.41153779842520494, | |
| "learning_rate": 3.992725237828763e-05, | |
| "loss": 0.7026, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.8452830188679246, | |
| "grad_norm": 2.6556838900087136, | |
| "learning_rate": 3.989927252378288e-05, | |
| "loss": 0.7276, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.8467924528301887, | |
| "grad_norm": 0.4051057742589149, | |
| "learning_rate": 3.987129266927812e-05, | |
| "loss": 0.7598, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.8483018867924528, | |
| "grad_norm": 0.4783860215225303, | |
| "learning_rate": 3.9843312814773363e-05, | |
| "loss": 0.7517, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.849811320754717, | |
| "grad_norm": 0.379963266361194, | |
| "learning_rate": 3.981533296026861e-05, | |
| "loss": 0.7294, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.8513207547169811, | |
| "grad_norm": 0.3995738900162358, | |
| "learning_rate": 3.978735310576385e-05, | |
| "loss": 0.7339, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8528301886792453, | |
| "grad_norm": 0.4429737030646909, | |
| "learning_rate": 3.9759373251259093e-05, | |
| "loss": 0.7687, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.8543396226415094, | |
| "grad_norm": 0.35025220758613634, | |
| "learning_rate": 3.9731393396754344e-05, | |
| "loss": 0.7217, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8558490566037736, | |
| "grad_norm": 0.5313539267701104, | |
| "learning_rate": 3.970341354224959e-05, | |
| "loss": 0.6841, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.8573584905660377, | |
| "grad_norm": 0.44159011121842684, | |
| "learning_rate": 3.9675433687744824e-05, | |
| "loss": 0.7777, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8588679245283019, | |
| "grad_norm": 0.5032221283954244, | |
| "learning_rate": 3.964745383324007e-05, | |
| "loss": 0.7903, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.8603773584905661, | |
| "grad_norm": 0.5209160538052351, | |
| "learning_rate": 3.961947397873531e-05, | |
| "loss": 0.7832, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8618867924528302, | |
| "grad_norm": 0.4117034313241634, | |
| "learning_rate": 3.9591494124230554e-05, | |
| "loss": 0.7483, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.8633962264150944, | |
| "grad_norm": 0.6056396156171621, | |
| "learning_rate": 3.95635142697258e-05, | |
| "loss": 0.7845, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.8649056603773585, | |
| "grad_norm": 0.4032653916231986, | |
| "learning_rate": 3.953553441522104e-05, | |
| "loss": 0.7263, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.8664150943396226, | |
| "grad_norm": 0.45120438719371664, | |
| "learning_rate": 3.9507554560716284e-05, | |
| "loss": 0.6961, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.8679245283018868, | |
| "grad_norm": 0.4944595803156947, | |
| "learning_rate": 3.9479574706211534e-05, | |
| "loss": 0.7188, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.8694339622641509, | |
| "grad_norm": 0.46427671257440184, | |
| "learning_rate": 3.945159485170678e-05, | |
| "loss": 0.798, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.8709433962264151, | |
| "grad_norm": 0.6434219625626204, | |
| "learning_rate": 3.942361499720202e-05, | |
| "loss": 0.7815, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.8724528301886793, | |
| "grad_norm": 0.4620657198048205, | |
| "learning_rate": 3.9395635142697264e-05, | |
| "loss": 0.7327, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.8739622641509434, | |
| "grad_norm": 0.40262252638288304, | |
| "learning_rate": 3.93676552881925e-05, | |
| "loss": 0.7607, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.8754716981132076, | |
| "grad_norm": 0.45983570698328446, | |
| "learning_rate": 3.9339675433687744e-05, | |
| "loss": 0.7012, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8769811320754717, | |
| "grad_norm": 0.40363702713690075, | |
| "learning_rate": 3.931169557918299e-05, | |
| "loss": 0.7372, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.8784905660377359, | |
| "grad_norm": 0.3907346317291026, | |
| "learning_rate": 3.928371572467823e-05, | |
| "loss": 0.7761, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.3990895685405329, | |
| "learning_rate": 3.9255735870173474e-05, | |
| "loss": 0.726, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.8815094339622641, | |
| "grad_norm": 0.3729973185332231, | |
| "learning_rate": 3.922775601566872e-05, | |
| "loss": 0.7677, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.8830188679245283, | |
| "grad_norm": 0.4089477576652253, | |
| "learning_rate": 3.919977616116397e-05, | |
| "loss": 0.7048, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.8845283018867924, | |
| "grad_norm": 0.3945532227748924, | |
| "learning_rate": 3.917179630665921e-05, | |
| "loss": 0.7714, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.8860377358490567, | |
| "grad_norm": 0.5140900259958995, | |
| "learning_rate": 3.9143816452154454e-05, | |
| "loss": 0.7382, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.8875471698113208, | |
| "grad_norm": 0.38891901989138217, | |
| "learning_rate": 3.91158365976497e-05, | |
| "loss": 0.8208, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.8890566037735849, | |
| "grad_norm": 0.49569695718152956, | |
| "learning_rate": 3.9087856743144934e-05, | |
| "loss": 0.7436, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.8905660377358491, | |
| "grad_norm": 0.43285829098870593, | |
| "learning_rate": 3.905987688864018e-05, | |
| "loss": 0.7365, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8920754716981132, | |
| "grad_norm": 0.36885687866534356, | |
| "learning_rate": 3.903189703413542e-05, | |
| "loss": 0.7022, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.8935849056603774, | |
| "grad_norm": 0.43241043983514127, | |
| "learning_rate": 3.9003917179630664e-05, | |
| "loss": 0.8662, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.8950943396226415, | |
| "grad_norm": 0.3649761816139934, | |
| "learning_rate": 3.897593732512591e-05, | |
| "loss": 0.7368, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.8966037735849056, | |
| "grad_norm": 0.6268302532119931, | |
| "learning_rate": 3.894795747062116e-05, | |
| "loss": 0.7893, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.8981132075471698, | |
| "grad_norm": 0.3667724539461183, | |
| "learning_rate": 3.89199776161164e-05, | |
| "loss": 0.7488, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.8996226415094339, | |
| "grad_norm": 0.48371976093317937, | |
| "learning_rate": 3.8891997761611645e-05, | |
| "loss": 0.7535, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.9011320754716982, | |
| "grad_norm": 0.4239299088649613, | |
| "learning_rate": 3.886401790710689e-05, | |
| "loss": 0.7663, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.9026415094339623, | |
| "grad_norm": 0.3779199263406811, | |
| "learning_rate": 3.883603805260213e-05, | |
| "loss": 0.7457, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.9041509433962264, | |
| "grad_norm": 0.9168684585034359, | |
| "learning_rate": 3.880805819809737e-05, | |
| "loss": 0.7519, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.9056603773584906, | |
| "grad_norm": 0.3661469557119822, | |
| "learning_rate": 3.878007834359261e-05, | |
| "loss": 0.7733, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9071698113207547, | |
| "grad_norm": 3.9823358965275806, | |
| "learning_rate": 3.8752098489087855e-05, | |
| "loss": 0.9001, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.9086792452830189, | |
| "grad_norm": 0.7852186633819904, | |
| "learning_rate": 3.87241186345831e-05, | |
| "loss": 0.7157, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.910188679245283, | |
| "grad_norm": 0.5408079896292302, | |
| "learning_rate": 3.869613878007835e-05, | |
| "loss": 0.7022, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.9116981132075471, | |
| "grad_norm": 0.7834797565859986, | |
| "learning_rate": 3.866815892557359e-05, | |
| "loss": 0.6932, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.9132075471698113, | |
| "grad_norm": 0.5709707609818889, | |
| "learning_rate": 3.8640179071068835e-05, | |
| "loss": 0.7641, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.9147169811320754, | |
| "grad_norm": 0.5337869168215333, | |
| "learning_rate": 3.861219921656408e-05, | |
| "loss": 0.7806, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.9162264150943397, | |
| "grad_norm": 0.45607562967499066, | |
| "learning_rate": 3.858421936205932e-05, | |
| "loss": 0.6848, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.9177358490566038, | |
| "grad_norm": 0.5120296828216754, | |
| "learning_rate": 3.8556239507554565e-05, | |
| "loss": 0.7603, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.9192452830188679, | |
| "grad_norm": 0.4344514448742209, | |
| "learning_rate": 3.852825965304981e-05, | |
| "loss": 0.6694, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.9207547169811321, | |
| "grad_norm": 0.5404883518478442, | |
| "learning_rate": 3.8500279798545045e-05, | |
| "loss": 0.7228, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.9222641509433962, | |
| "grad_norm": 0.4970271679168196, | |
| "learning_rate": 3.847229994404029e-05, | |
| "loss": 0.7797, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.9237735849056604, | |
| "grad_norm": 0.49505737921449006, | |
| "learning_rate": 3.844432008953553e-05, | |
| "loss": 0.7037, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.9252830188679245, | |
| "grad_norm": 0.7739566407768966, | |
| "learning_rate": 3.841634023503078e-05, | |
| "loss": 0.7542, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.9267924528301886, | |
| "grad_norm": 0.4238126871917205, | |
| "learning_rate": 3.8388360380526025e-05, | |
| "loss": 0.7202, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.9283018867924528, | |
| "grad_norm": 0.6151008080921576, | |
| "learning_rate": 3.836038052602127e-05, | |
| "loss": 0.7636, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.929811320754717, | |
| "grad_norm": 0.42881880105092096, | |
| "learning_rate": 3.833240067151651e-05, | |
| "loss": 0.7613, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.9313207547169812, | |
| "grad_norm": 0.5146237853082963, | |
| "learning_rate": 3.8304420817011755e-05, | |
| "loss": 0.715, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.9328301886792453, | |
| "grad_norm": 0.3714673908627866, | |
| "learning_rate": 3.8276440962507e-05, | |
| "loss": 0.745, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.9343396226415094, | |
| "grad_norm": 0.4650679347629378, | |
| "learning_rate": 3.824846110800224e-05, | |
| "loss": 0.7422, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.9358490566037736, | |
| "grad_norm": 1.0749664641896852, | |
| "learning_rate": 3.822048125349748e-05, | |
| "loss": 0.7985, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.9373584905660377, | |
| "grad_norm": 0.40863953645268636, | |
| "learning_rate": 3.819250139899272e-05, | |
| "loss": 0.7385, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.9388679245283019, | |
| "grad_norm": 0.40252351812274834, | |
| "learning_rate": 3.816452154448797e-05, | |
| "loss": 0.7643, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.940377358490566, | |
| "grad_norm": 0.43559572158086535, | |
| "learning_rate": 3.8136541689983215e-05, | |
| "loss": 0.7439, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.9418867924528301, | |
| "grad_norm": 0.9641369155300525, | |
| "learning_rate": 3.810856183547846e-05, | |
| "loss": 0.7718, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9433962264150944, | |
| "grad_norm": 0.4310013200104187, | |
| "learning_rate": 3.80805819809737e-05, | |
| "loss": 0.7925, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.9449056603773585, | |
| "grad_norm": 0.4194039790316717, | |
| "learning_rate": 3.8052602126468945e-05, | |
| "loss": 0.7287, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9464150943396227, | |
| "grad_norm": 0.4665354497735913, | |
| "learning_rate": 3.802462227196419e-05, | |
| "loss": 0.6991, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.9479245283018868, | |
| "grad_norm": 0.45730976704052134, | |
| "learning_rate": 3.799664241745943e-05, | |
| "loss": 0.7642, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9494339622641509, | |
| "grad_norm": 0.3856324295687261, | |
| "learning_rate": 3.7968662562954675e-05, | |
| "loss": 0.6671, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.9509433962264151, | |
| "grad_norm": 0.4107928434108811, | |
| "learning_rate": 3.794068270844992e-05, | |
| "loss": 0.7561, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.9524528301886792, | |
| "grad_norm": 0.44719174208858464, | |
| "learning_rate": 3.791270285394516e-05, | |
| "loss": 0.8235, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.9539622641509434, | |
| "grad_norm": 0.4008367508350329, | |
| "learning_rate": 3.7884722999440406e-05, | |
| "loss": 0.7292, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9554716981132075, | |
| "grad_norm": 0.4391202885533926, | |
| "learning_rate": 3.785674314493565e-05, | |
| "loss": 0.8191, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.9569811320754718, | |
| "grad_norm": 0.42891554570166274, | |
| "learning_rate": 3.782876329043089e-05, | |
| "loss": 0.8442, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9584905660377359, | |
| "grad_norm": 0.43000876777383873, | |
| "learning_rate": 3.7800783435926136e-05, | |
| "loss": 0.7543, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.4155832959292569, | |
| "learning_rate": 3.777280358142138e-05, | |
| "loss": 0.729, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.9615094339622642, | |
| "grad_norm": 1.3022894310168054, | |
| "learning_rate": 3.774482372691662e-05, | |
| "loss": 0.7624, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.9630188679245283, | |
| "grad_norm": 0.39110065047725334, | |
| "learning_rate": 3.7716843872411866e-05, | |
| "loss": 0.7788, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.9645283018867925, | |
| "grad_norm": 0.4420204425883805, | |
| "learning_rate": 3.768886401790711e-05, | |
| "loss": 0.8003, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.9660377358490566, | |
| "grad_norm": 0.47846340432718537, | |
| "learning_rate": 3.766088416340235e-05, | |
| "loss": 0.661, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9675471698113207, | |
| "grad_norm": 0.45386680890224157, | |
| "learning_rate": 3.7632904308897596e-05, | |
| "loss": 0.7336, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.9690566037735849, | |
| "grad_norm": 0.4666290433954947, | |
| "learning_rate": 3.760492445439284e-05, | |
| "loss": 0.8496, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.970566037735849, | |
| "grad_norm": 0.5022567954654142, | |
| "learning_rate": 3.757694459988808e-05, | |
| "loss": 0.7529, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.9720754716981133, | |
| "grad_norm": 0.49014277465359335, | |
| "learning_rate": 3.7548964745383326e-05, | |
| "loss": 0.7722, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.9735849056603774, | |
| "grad_norm": 0.5190484716624026, | |
| "learning_rate": 3.752098489087857e-05, | |
| "loss": 0.723, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.9750943396226415, | |
| "grad_norm": 0.41311399967316736, | |
| "learning_rate": 3.749300503637381e-05, | |
| "loss": 0.7089, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.9766037735849057, | |
| "grad_norm": 0.4396273185284637, | |
| "learning_rate": 3.7465025181869056e-05, | |
| "loss": 0.7921, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.9781132075471698, | |
| "grad_norm": 0.4228832100437926, | |
| "learning_rate": 3.74370453273643e-05, | |
| "loss": 0.7376, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.979622641509434, | |
| "grad_norm": 0.42404331809881607, | |
| "learning_rate": 3.740906547285954e-05, | |
| "loss": 0.8042, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.9811320754716981, | |
| "grad_norm": 0.43384836343190214, | |
| "learning_rate": 3.7381085618354786e-05, | |
| "loss": 0.7457, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9826415094339622, | |
| "grad_norm": 0.40991552369681833, | |
| "learning_rate": 3.735310576385003e-05, | |
| "loss": 0.7271, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.9841509433962264, | |
| "grad_norm": 0.42720312031263036, | |
| "learning_rate": 3.732512590934527e-05, | |
| "loss": 0.6783, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.9856603773584905, | |
| "grad_norm": 0.3820997114893428, | |
| "learning_rate": 3.7297146054840516e-05, | |
| "loss": 0.741, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.9871698113207548, | |
| "grad_norm": 0.3859225460582329, | |
| "learning_rate": 3.726916620033576e-05, | |
| "loss": 0.7616, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.9886792452830189, | |
| "grad_norm": 0.3184257704684841, | |
| "learning_rate": 3.7241186345831e-05, | |
| "loss": 0.7018, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.990188679245283, | |
| "grad_norm": 5.0525018446915455, | |
| "learning_rate": 3.7213206491326246e-05, | |
| "loss": 0.6822, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.9916981132075472, | |
| "grad_norm": 0.4774160070976838, | |
| "learning_rate": 3.718522663682149e-05, | |
| "loss": 0.759, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.9932075471698113, | |
| "grad_norm": 0.38749071520309747, | |
| "learning_rate": 3.715724678231673e-05, | |
| "loss": 0.681, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.9947169811320755, | |
| "grad_norm": 0.3565441160393527, | |
| "learning_rate": 3.7129266927811976e-05, | |
| "loss": 0.7235, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.9962264150943396, | |
| "grad_norm": 0.3620064697768303, | |
| "learning_rate": 3.710128707330722e-05, | |
| "loss": 0.702, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9977358490566037, | |
| "grad_norm": 0.3997483511508158, | |
| "learning_rate": 3.707330721880247e-05, | |
| "loss": 0.7241, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.999245283018868, | |
| "grad_norm": 0.9076134480414219, | |
| "learning_rate": 3.7045327364297706e-05, | |
| "loss": 0.7179, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.9076134480414219, | |
| "learning_rate": 3.701734750979295e-05, | |
| "loss": 0.7348, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.001509433962264, | |
| "grad_norm": 0.6627110764246336, | |
| "learning_rate": 3.698936765528819e-05, | |
| "loss": 0.6885, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.0030188679245282, | |
| "grad_norm": 10.266344586969133, | |
| "learning_rate": 3.6961387800783437e-05, | |
| "loss": 1.0522, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.0045283018867925, | |
| "grad_norm": 0.810534888309623, | |
| "learning_rate": 3.693340794627868e-05, | |
| "loss": 0.6195, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.0060377358490566, | |
| "grad_norm": 1.7755771547569201, | |
| "learning_rate": 3.690542809177392e-05, | |
| "loss": 0.6254, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.0075471698113208, | |
| "grad_norm": 0.6892452310322204, | |
| "learning_rate": 3.687744823726917e-05, | |
| "loss": 0.6086, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.0090566037735849, | |
| "grad_norm": 0.5926694715314103, | |
| "learning_rate": 3.684946838276441e-05, | |
| "loss": 0.5926, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.010566037735849, | |
| "grad_norm": 0.41200219649494063, | |
| "learning_rate": 3.682148852825965e-05, | |
| "loss": 0.6277, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.0120754716981133, | |
| "grad_norm": 0.8158461895891851, | |
| "learning_rate": 3.6793508673754903e-05, | |
| "loss": 0.6335, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.0135849056603774, | |
| "grad_norm": 0.601172077942669, | |
| "learning_rate": 3.676552881925014e-05, | |
| "loss": 0.6547, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.0150943396226415, | |
| "grad_norm": 0.4134386101441369, | |
| "learning_rate": 3.6737548964745383e-05, | |
| "loss": 0.6927, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.0166037735849056, | |
| "grad_norm": 0.5120306933334605, | |
| "learning_rate": 3.670956911024063e-05, | |
| "loss": 0.6646, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.0181132075471697, | |
| "grad_norm": 0.5525313287248008, | |
| "learning_rate": 3.668158925573587e-05, | |
| "loss": 0.6283, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.019622641509434, | |
| "grad_norm": 0.42590696095726077, | |
| "learning_rate": 3.6653609401231114e-05, | |
| "loss": 0.7062, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.0211320754716982, | |
| "grad_norm": 0.724226525712548, | |
| "learning_rate": 3.662562954672636e-05, | |
| "loss": 0.6758, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.0226415094339623, | |
| "grad_norm": 0.5674130150381019, | |
| "learning_rate": 3.65976496922216e-05, | |
| "loss": 0.6043, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.0241509433962264, | |
| "grad_norm": 0.4079281128046953, | |
| "learning_rate": 3.6569669837716844e-05, | |
| "loss": 0.6014, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.0256603773584905, | |
| "grad_norm": 0.46953990551509994, | |
| "learning_rate": 3.6541689983212094e-05, | |
| "loss": 0.7177, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.0271698113207548, | |
| "grad_norm": 0.4545817525144365, | |
| "learning_rate": 3.651371012870734e-05, | |
| "loss": 0.6133, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.028679245283019, | |
| "grad_norm": 0.38317634414943963, | |
| "learning_rate": 3.648573027420258e-05, | |
| "loss": 0.5863, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.030188679245283, | |
| "grad_norm": 0.4097662107255479, | |
| "learning_rate": 3.645775041969782e-05, | |
| "loss": 0.6544, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.0316981132075471, | |
| "grad_norm": 0.5667177083373588, | |
| "learning_rate": 3.642977056519306e-05, | |
| "loss": 0.6172, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.0332075471698112, | |
| "grad_norm": 0.3606672570163167, | |
| "learning_rate": 3.6401790710688304e-05, | |
| "loss": 0.6386, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.0347169811320756, | |
| "grad_norm": 0.4911306296070032, | |
| "learning_rate": 3.637381085618355e-05, | |
| "loss": 0.6652, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.0362264150943397, | |
| "grad_norm": 2.4217324647576763, | |
| "learning_rate": 3.634583100167879e-05, | |
| "loss": 0.7402, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.0377358490566038, | |
| "grad_norm": 0.4824187002755876, | |
| "learning_rate": 3.6317851147174034e-05, | |
| "loss": 0.711, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.0392452830188679, | |
| "grad_norm": 0.49721674246607617, | |
| "learning_rate": 3.6289871292669284e-05, | |
| "loss": 0.5908, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.040754716981132, | |
| "grad_norm": 0.3743482427307361, | |
| "learning_rate": 3.626189143816453e-05, | |
| "loss": 0.6319, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.0422641509433963, | |
| "grad_norm": 0.43291507293459125, | |
| "learning_rate": 3.623391158365977e-05, | |
| "loss": 0.6705, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.0437735849056604, | |
| "grad_norm": 0.5208705268592786, | |
| "learning_rate": 3.6205931729155014e-05, | |
| "loss": 0.6039, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.0452830188679245, | |
| "grad_norm": 0.410450044767452, | |
| "learning_rate": 3.617795187465025e-05, | |
| "loss": 0.6525, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.0467924528301886, | |
| "grad_norm": 0.4022445567989694, | |
| "learning_rate": 3.6149972020145494e-05, | |
| "loss": 0.6954, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.0483018867924527, | |
| "grad_norm": 0.47132444327306144, | |
| "learning_rate": 3.612199216564074e-05, | |
| "loss": 0.6521, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.049811320754717, | |
| "grad_norm": 0.3875966766440471, | |
| "learning_rate": 3.609401231113598e-05, | |
| "loss": 0.614, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.0513207547169812, | |
| "grad_norm": 0.451663223959601, | |
| "learning_rate": 3.6066032456631224e-05, | |
| "loss": 0.6445, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.0528301886792453, | |
| "grad_norm": 0.41172392506051253, | |
| "learning_rate": 3.603805260212647e-05, | |
| "loss": 0.6422, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.0543396226415094, | |
| "grad_norm": 0.375540946924188, | |
| "learning_rate": 3.601007274762172e-05, | |
| "loss": 0.652, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.0558490566037735, | |
| "grad_norm": 0.3717387172700223, | |
| "learning_rate": 3.598209289311696e-05, | |
| "loss": 0.6777, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0573584905660378, | |
| "grad_norm": 0.424782758304706, | |
| "learning_rate": 3.5954113038612204e-05, | |
| "loss": 0.6228, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.058867924528302, | |
| "grad_norm": 0.3391925209354729, | |
| "learning_rate": 3.592613318410745e-05, | |
| "loss": 0.6024, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.060377358490566, | |
| "grad_norm": 0.32914720950015897, | |
| "learning_rate": 3.5898153329602684e-05, | |
| "loss": 0.6457, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.0618867924528301, | |
| "grad_norm": 1.0554743822729729, | |
| "learning_rate": 3.587017347509793e-05, | |
| "loss": 0.5799, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.0633962264150942, | |
| "grad_norm": 0.39339053656170486, | |
| "learning_rate": 3.584219362059317e-05, | |
| "loss": 0.5598, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.0649056603773586, | |
| "grad_norm": 0.40135937465077187, | |
| "learning_rate": 3.5814213766088414e-05, | |
| "loss": 0.6226, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.0664150943396227, | |
| "grad_norm": 0.43288336102917757, | |
| "learning_rate": 3.578623391158366e-05, | |
| "loss": 0.6274, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.0679245283018868, | |
| "grad_norm": 0.4521991854235038, | |
| "learning_rate": 3.575825405707891e-05, | |
| "loss": 0.6483, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.0694339622641509, | |
| "grad_norm": 0.402604567263997, | |
| "learning_rate": 3.573027420257415e-05, | |
| "loss": 0.6309, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.070943396226415, | |
| "grad_norm": 0.46402872137647033, | |
| "learning_rate": 3.5702294348069395e-05, | |
| "loss": 0.639, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0724528301886793, | |
| "grad_norm": 0.4017030194373752, | |
| "learning_rate": 3.567431449356464e-05, | |
| "loss": 0.6329, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.0739622641509434, | |
| "grad_norm": 0.47618502583548106, | |
| "learning_rate": 3.564633463905988e-05, | |
| "loss": 0.7084, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.0754716981132075, | |
| "grad_norm": 0.41264204371650326, | |
| "learning_rate": 3.5618354784555125e-05, | |
| "loss": 0.6365, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.0769811320754716, | |
| "grad_norm": 0.38772697844532533, | |
| "learning_rate": 3.559037493005036e-05, | |
| "loss": 0.6308, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.0784905660377357, | |
| "grad_norm": 0.46449961773150183, | |
| "learning_rate": 3.5562395075545605e-05, | |
| "loss": 0.6279, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.45078141741988764, | |
| "learning_rate": 3.553441522104085e-05, | |
| "loss": 0.6256, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.0815094339622642, | |
| "grad_norm": 0.5161091002718486, | |
| "learning_rate": 3.55064353665361e-05, | |
| "loss": 0.6283, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.0830188679245283, | |
| "grad_norm": 0.3938545925062218, | |
| "learning_rate": 3.547845551203134e-05, | |
| "loss": 0.6029, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.0845283018867924, | |
| "grad_norm": 0.4535260490235984, | |
| "learning_rate": 3.5450475657526585e-05, | |
| "loss": 0.6482, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.0860377358490565, | |
| "grad_norm": 0.3970289231442372, | |
| "learning_rate": 3.542249580302183e-05, | |
| "loss": 0.5763, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.0875471698113208, | |
| "grad_norm": 0.4039865278069702, | |
| "learning_rate": 3.539451594851707e-05, | |
| "loss": 0.6558, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.089056603773585, | |
| "grad_norm": 0.4202122162673237, | |
| "learning_rate": 3.5366536094012315e-05, | |
| "loss": 0.6462, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.090566037735849, | |
| "grad_norm": 0.38039927746822294, | |
| "learning_rate": 3.533855623950756e-05, | |
| "loss": 0.6544, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.0920754716981131, | |
| "grad_norm": 0.40116562127860167, | |
| "learning_rate": 3.5310576385002795e-05, | |
| "loss": 0.6408, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.0935849056603772, | |
| "grad_norm": 0.48128273610391287, | |
| "learning_rate": 3.528259653049804e-05, | |
| "loss": 0.5859, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.0950943396226416, | |
| "grad_norm": 0.42443398500645513, | |
| "learning_rate": 3.525461667599328e-05, | |
| "loss": 0.6272, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.0966037735849057, | |
| "grad_norm": 0.8589027269475118, | |
| "learning_rate": 3.522663682148853e-05, | |
| "loss": 0.6466, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.0981132075471698, | |
| "grad_norm": 1.3107651423337432, | |
| "learning_rate": 3.5198656966983775e-05, | |
| "loss": 0.6948, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.099622641509434, | |
| "grad_norm": 0.46154805702038726, | |
| "learning_rate": 3.517067711247902e-05, | |
| "loss": 0.647, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.101132075471698, | |
| "grad_norm": 0.4215490457807108, | |
| "learning_rate": 3.514269725797426e-05, | |
| "loss": 0.639, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.1026415094339623, | |
| "grad_norm": 0.44910831262776585, | |
| "learning_rate": 3.5114717403469505e-05, | |
| "loss": 0.6416, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.1041509433962264, | |
| "grad_norm": 0.4180279321095757, | |
| "learning_rate": 3.508673754896475e-05, | |
| "loss": 0.687, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.1056603773584905, | |
| "grad_norm": 0.3787657588954383, | |
| "learning_rate": 3.505875769445999e-05, | |
| "loss": 0.6842, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.1071698113207546, | |
| "grad_norm": 0.4577454096053106, | |
| "learning_rate": 3.5030777839955235e-05, | |
| "loss": 0.6741, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.1086792452830188, | |
| "grad_norm": 1.4461079332215183, | |
| "learning_rate": 3.500279798545047e-05, | |
| "loss": 0.6817, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.110188679245283, | |
| "grad_norm": 0.4104193651538478, | |
| "learning_rate": 3.497481813094572e-05, | |
| "loss": 0.6317, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.1116981132075472, | |
| "grad_norm": 0.655866457602048, | |
| "learning_rate": 3.4946838276440965e-05, | |
| "loss": 0.6313, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.1132075471698113, | |
| "grad_norm": 0.4531623276746969, | |
| "learning_rate": 3.491885842193621e-05, | |
| "loss": 0.5892, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.1147169811320754, | |
| "grad_norm": 0.5709351123804889, | |
| "learning_rate": 3.489087856743145e-05, | |
| "loss": 0.5634, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.1162264150943395, | |
| "grad_norm": 0.48674031702120063, | |
| "learning_rate": 3.4862898712926696e-05, | |
| "loss": 0.6801, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.1177358490566038, | |
| "grad_norm": 0.4048608285674579, | |
| "learning_rate": 3.483491885842194e-05, | |
| "loss": 0.5654, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.119245283018868, | |
| "grad_norm": 0.4626449984604195, | |
| "learning_rate": 3.480693900391718e-05, | |
| "loss": 0.6544, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.120754716981132, | |
| "grad_norm": 0.5183643140468202, | |
| "learning_rate": 3.4778959149412426e-05, | |
| "loss": 0.6651, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.1222641509433962, | |
| "grad_norm": 0.49226691831400854, | |
| "learning_rate": 3.475097929490767e-05, | |
| "loss": 0.6862, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.1237735849056603, | |
| "grad_norm": 4.415706986920795, | |
| "learning_rate": 3.472299944040291e-05, | |
| "loss": 0.6689, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.1252830188679246, | |
| "grad_norm": 0.7573980083060947, | |
| "learning_rate": 3.4695019585898156e-05, | |
| "loss": 0.6373, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.1267924528301887, | |
| "grad_norm": 0.5906345275939948, | |
| "learning_rate": 3.46670397313934e-05, | |
| "loss": 0.6309, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.1283018867924528, | |
| "grad_norm": 0.3998318687851271, | |
| "learning_rate": 3.463905987688864e-05, | |
| "loss": 0.6802, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.129811320754717, | |
| "grad_norm": 0.5467978795675057, | |
| "learning_rate": 3.4611080022383886e-05, | |
| "loss": 0.562, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.131320754716981, | |
| "grad_norm": 0.5191137071980696, | |
| "learning_rate": 3.458310016787913e-05, | |
| "loss": 0.6156, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1328301886792453, | |
| "grad_norm": 0.44633904601748514, | |
| "learning_rate": 3.455512031337437e-05, | |
| "loss": 0.5838, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.1343396226415094, | |
| "grad_norm": 0.5065992896082492, | |
| "learning_rate": 3.4527140458869616e-05, | |
| "loss": 0.6843, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.1358490566037736, | |
| "grad_norm": 0.4264224918247215, | |
| "learning_rate": 3.449916060436486e-05, | |
| "loss": 0.6091, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.1373584905660377, | |
| "grad_norm": 0.3992177248827404, | |
| "learning_rate": 3.44711807498601e-05, | |
| "loss": 0.6, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.1388679245283018, | |
| "grad_norm": 0.48903828473507216, | |
| "learning_rate": 3.4443200895355346e-05, | |
| "loss": 0.6459, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.140377358490566, | |
| "grad_norm": 0.4105745437343364, | |
| "learning_rate": 3.441522104085059e-05, | |
| "loss": 0.6526, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.1418867924528302, | |
| "grad_norm": 0.49384071168597404, | |
| "learning_rate": 3.438724118634583e-05, | |
| "loss": 0.685, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.1433962264150943, | |
| "grad_norm": 0.5413740841399795, | |
| "learning_rate": 3.4359261331841076e-05, | |
| "loss": 0.6377, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.1449056603773584, | |
| "grad_norm": 0.480269969475746, | |
| "learning_rate": 3.433128147733632e-05, | |
| "loss": 0.6463, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.1464150943396225, | |
| "grad_norm": 0.5646301785912243, | |
| "learning_rate": 3.430330162283156e-05, | |
| "loss": 0.6265, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.1479245283018868, | |
| "grad_norm": 0.8673433809554542, | |
| "learning_rate": 3.4275321768326806e-05, | |
| "loss": 0.642, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.149433962264151, | |
| "grad_norm": 0.4112877387329245, | |
| "learning_rate": 3.424734191382205e-05, | |
| "loss": 0.6082, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.150943396226415, | |
| "grad_norm": 0.6320032562966805, | |
| "learning_rate": 3.421936205931729e-05, | |
| "loss": 0.6219, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.1524528301886792, | |
| "grad_norm": 0.46715710458859117, | |
| "learning_rate": 3.4191382204812536e-05, | |
| "loss": 0.6477, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.1539622641509433, | |
| "grad_norm": 0.5192541163104857, | |
| "learning_rate": 3.416340235030778e-05, | |
| "loss": 0.6686, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.1554716981132076, | |
| "grad_norm": 0.5115037747798377, | |
| "learning_rate": 3.413542249580302e-05, | |
| "loss": 0.5957, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.1569811320754717, | |
| "grad_norm": 0.4631548895084437, | |
| "learning_rate": 3.4107442641298266e-05, | |
| "loss": 0.6691, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.1584905660377358, | |
| "grad_norm": 0.4315295867507691, | |
| "learning_rate": 3.407946278679351e-05, | |
| "loss": 0.5968, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.5489635360542999, | |
| "learning_rate": 3.405148293228875e-05, | |
| "loss": 0.6286, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.161509433962264, | |
| "grad_norm": 0.5260776713612061, | |
| "learning_rate": 3.4023503077783996e-05, | |
| "loss": 0.6876, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.1630188679245284, | |
| "grad_norm": 0.41102121571815237, | |
| "learning_rate": 3.399552322327924e-05, | |
| "loss": 0.6875, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.1645283018867925, | |
| "grad_norm": 0.5051554044354539, | |
| "learning_rate": 3.396754336877448e-05, | |
| "loss": 0.6126, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.1660377358490566, | |
| "grad_norm": 0.4562465913464345, | |
| "learning_rate": 3.3939563514269726e-05, | |
| "loss": 0.6137, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.1675471698113207, | |
| "grad_norm": 0.4743549865343238, | |
| "learning_rate": 3.391158365976497e-05, | |
| "loss": 0.6315, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.169056603773585, | |
| "grad_norm": 0.5247109156016383, | |
| "learning_rate": 3.388360380526022e-05, | |
| "loss": 0.6428, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.170566037735849, | |
| "grad_norm": 3.559423434494465, | |
| "learning_rate": 3.3855623950755457e-05, | |
| "loss": 0.6067, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.1720754716981132, | |
| "grad_norm": 0.5989095184362712, | |
| "learning_rate": 3.38276440962507e-05, | |
| "loss": 0.6457, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.1735849056603773, | |
| "grad_norm": 0.5188354410044358, | |
| "learning_rate": 3.379966424174594e-05, | |
| "loss": 0.5998, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.1750943396226414, | |
| "grad_norm": 0.4214267781907578, | |
| "learning_rate": 3.377168438724119e-05, | |
| "loss": 0.6579, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.1766037735849058, | |
| "grad_norm": 0.45186691120990086, | |
| "learning_rate": 3.374370453273643e-05, | |
| "loss": 0.6321, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.1781132075471699, | |
| "grad_norm": 0.5363268074933428, | |
| "learning_rate": 3.371572467823167e-05, | |
| "loss": 0.6145, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.179622641509434, | |
| "grad_norm": 0.49420994902734927, | |
| "learning_rate": 3.368774482372692e-05, | |
| "loss": 0.6015, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.181132075471698, | |
| "grad_norm": 0.3799463287008628, | |
| "learning_rate": 3.365976496922216e-05, | |
| "loss": 0.7114, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.1826415094339622, | |
| "grad_norm": 0.4873082496659108, | |
| "learning_rate": 3.3631785114717403e-05, | |
| "loss": 0.6194, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.1841509433962265, | |
| "grad_norm": 0.4207411818460765, | |
| "learning_rate": 3.3603805260212654e-05, | |
| "loss": 0.6103, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.1856603773584906, | |
| "grad_norm": 0.5223740890535683, | |
| "learning_rate": 3.35758254057079e-05, | |
| "loss": 0.6299, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.1871698113207547, | |
| "grad_norm": 0.3569381738413025, | |
| "learning_rate": 3.3547845551203134e-05, | |
| "loss": 0.6105, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.1886792452830188, | |
| "grad_norm": 4.0178075776210775, | |
| "learning_rate": 3.351986569669838e-05, | |
| "loss": 0.6551, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.190188679245283, | |
| "grad_norm": 0.9315621690551965, | |
| "learning_rate": 3.349188584219362e-05, | |
| "loss": 0.6726, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.1916981132075473, | |
| "grad_norm": 0.4663325979493853, | |
| "learning_rate": 3.3463905987688864e-05, | |
| "loss": 0.6479, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.1932075471698114, | |
| "grad_norm": 0.8155128870027623, | |
| "learning_rate": 3.343592613318411e-05, | |
| "loss": 0.6321, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.1947169811320755, | |
| "grad_norm": 0.5354713553205344, | |
| "learning_rate": 3.340794627867935e-05, | |
| "loss": 0.6267, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.1962264150943396, | |
| "grad_norm": 0.8995514690519233, | |
| "learning_rate": 3.3379966424174594e-05, | |
| "loss": 0.6668, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.1977358490566037, | |
| "grad_norm": 0.42325453026549564, | |
| "learning_rate": 3.3351986569669844e-05, | |
| "loss": 0.6892, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.199245283018868, | |
| "grad_norm": 0.9019398748449015, | |
| "learning_rate": 3.332400671516509e-05, | |
| "loss": 0.6142, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.2007547169811321, | |
| "grad_norm": 0.42650778637925774, | |
| "learning_rate": 3.329602686066033e-05, | |
| "loss": 0.9311, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.2022641509433962, | |
| "grad_norm": 5.832651725718385, | |
| "learning_rate": 3.326804700615557e-05, | |
| "loss": 0.5994, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.2037735849056603, | |
| "grad_norm": 1.4055840202932217, | |
| "learning_rate": 3.324006715165081e-05, | |
| "loss": 0.6326, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.2052830188679244, | |
| "grad_norm": 0.5916688857583293, | |
| "learning_rate": 3.3212087297146054e-05, | |
| "loss": 0.6772, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.2067924528301888, | |
| "grad_norm": 0.9928690203020595, | |
| "learning_rate": 3.31841074426413e-05, | |
| "loss": 0.595, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.2083018867924529, | |
| "grad_norm": 0.9349255830289889, | |
| "learning_rate": 3.315612758813654e-05, | |
| "loss": 0.7102, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.209811320754717, | |
| "grad_norm": 0.7686689517909183, | |
| "learning_rate": 3.3128147733631784e-05, | |
| "loss": 0.6264, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.211320754716981, | |
| "grad_norm": 0.9656464748523269, | |
| "learning_rate": 3.3100167879127034e-05, | |
| "loss": 0.6611, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.2128301886792452, | |
| "grad_norm": 0.5809016244321841, | |
| "learning_rate": 3.307218802462228e-05, | |
| "loss": 0.6674, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.2143396226415095, | |
| "grad_norm": 0.8488641757151281, | |
| "learning_rate": 3.304420817011752e-05, | |
| "loss": 0.6724, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.2158490566037736, | |
| "grad_norm": 0.5115352162894421, | |
| "learning_rate": 3.3016228315612764e-05, | |
| "loss": 0.6244, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.2173584905660377, | |
| "grad_norm": 0.6746402205487975, | |
| "learning_rate": 3.2988248461108e-05, | |
| "loss": 0.6216, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.2188679245283018, | |
| "grad_norm": 0.5947361094685111, | |
| "learning_rate": 3.2960268606603244e-05, | |
| "loss": 0.6407, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.220377358490566, | |
| "grad_norm": 0.6118537661273408, | |
| "learning_rate": 3.293228875209849e-05, | |
| "loss": 0.6642, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.2218867924528303, | |
| "grad_norm": 0.5853643244928053, | |
| "learning_rate": 3.290430889759373e-05, | |
| "loss": 0.6274, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.2233962264150944, | |
| "grad_norm": 0.4408870524517529, | |
| "learning_rate": 3.2876329043088974e-05, | |
| "loss": 0.6762, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.2249056603773585, | |
| "grad_norm": 0.5786375937277597, | |
| "learning_rate": 3.284834918858422e-05, | |
| "loss": 0.618, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.2264150943396226, | |
| "grad_norm": 0.37448594144435, | |
| "learning_rate": 3.282036933407947e-05, | |
| "loss": 0.6316, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.2279245283018867, | |
| "grad_norm": 0.4860905984518343, | |
| "learning_rate": 3.279238947957471e-05, | |
| "loss": 0.6299, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.229433962264151, | |
| "grad_norm": 0.5849544770920747, | |
| "learning_rate": 3.2764409625069954e-05, | |
| "loss": 0.5729, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.2309433962264151, | |
| "grad_norm": 0.4008195084296223, | |
| "learning_rate": 3.27364297705652e-05, | |
| "loss": 0.6682, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.2324528301886792, | |
| "grad_norm": 0.5272118883156421, | |
| "learning_rate": 3.270844991606044e-05, | |
| "loss": 0.6049, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.2339622641509433, | |
| "grad_norm": 0.550682484167377, | |
| "learning_rate": 3.268047006155568e-05, | |
| "loss": 0.5918, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.2354716981132075, | |
| "grad_norm": 0.45899566874631015, | |
| "learning_rate": 3.265249020705092e-05, | |
| "loss": 0.6293, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.2369811320754718, | |
| "grad_norm": 0.41053358276258073, | |
| "learning_rate": 3.2624510352546165e-05, | |
| "loss": 0.5974, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.2384905660377359, | |
| "grad_norm": 0.5673047046076692, | |
| "learning_rate": 3.259653049804141e-05, | |
| "loss": 0.6689, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.42001733257774354, | |
| "learning_rate": 3.256855064353666e-05, | |
| "loss": 0.5781, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.241509433962264, | |
| "grad_norm": 0.5274791837888488, | |
| "learning_rate": 3.25405707890319e-05, | |
| "loss": 0.6556, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.2430188679245284, | |
| "grad_norm": 0.40140251232729945, | |
| "learning_rate": 3.2512590934527145e-05, | |
| "loss": 0.5618, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.2445283018867925, | |
| "grad_norm": 0.38988606187868724, | |
| "learning_rate": 3.248461108002239e-05, | |
| "loss": 0.6696, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.2460377358490566, | |
| "grad_norm": 0.6304384074605135, | |
| "learning_rate": 3.245663122551763e-05, | |
| "loss": 0.6769, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.2475471698113207, | |
| "grad_norm": 0.3979395391659101, | |
| "learning_rate": 3.2428651371012875e-05, | |
| "loss": 0.6174, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.2490566037735849, | |
| "grad_norm": 0.5329831109280307, | |
| "learning_rate": 3.240067151650811e-05, | |
| "loss": 0.6181, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.2505660377358492, | |
| "grad_norm": 0.4319961747815377, | |
| "learning_rate": 3.2372691662003355e-05, | |
| "loss": 0.6993, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.2520754716981133, | |
| "grad_norm": 0.4920198115969687, | |
| "learning_rate": 3.23447118074986e-05, | |
| "loss": 0.6775, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.2535849056603774, | |
| "grad_norm": 0.5327815925026391, | |
| "learning_rate": 3.231673195299385e-05, | |
| "loss": 0.6518, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.2550943396226415, | |
| "grad_norm": 0.4389886233997492, | |
| "learning_rate": 3.228875209848909e-05, | |
| "loss": 0.6365, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.2566037735849056, | |
| "grad_norm": 0.5047415384999543, | |
| "learning_rate": 3.2260772243984335e-05, | |
| "loss": 0.7062, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.25811320754717, | |
| "grad_norm": 0.4396225841431117, | |
| "learning_rate": 3.223279238947958e-05, | |
| "loss": 0.61, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.259622641509434, | |
| "grad_norm": 0.4564667026923815, | |
| "learning_rate": 3.220481253497482e-05, | |
| "loss": 0.6335, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.2611320754716981, | |
| "grad_norm": 0.39053767513919807, | |
| "learning_rate": 3.2176832680470065e-05, | |
| "loss": 0.6055, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.2626415094339623, | |
| "grad_norm": 0.3783353602690344, | |
| "learning_rate": 3.214885282596531e-05, | |
| "loss": 0.7125, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.2641509433962264, | |
| "grad_norm": 0.3553163046641145, | |
| "learning_rate": 3.2120872971460545e-05, | |
| "loss": 0.6143, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.2656603773584907, | |
| "grad_norm": 0.35917670960749537, | |
| "learning_rate": 3.209289311695579e-05, | |
| "loss": 0.5769, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.2671698113207548, | |
| "grad_norm": 0.36467830677027235, | |
| "learning_rate": 3.206491326245103e-05, | |
| "loss": 0.5699, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.268679245283019, | |
| "grad_norm": 0.3773609262039195, | |
| "learning_rate": 3.203693340794628e-05, | |
| "loss": 0.6271, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.270188679245283, | |
| "grad_norm": 0.3795609120240298, | |
| "learning_rate": 3.2008953553441525e-05, | |
| "loss": 0.6725, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.271698113207547, | |
| "grad_norm": 0.37050676683076367, | |
| "learning_rate": 3.198097369893677e-05, | |
| "loss": 0.5933, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.2732075471698114, | |
| "grad_norm": 0.45887192265355964, | |
| "learning_rate": 3.195299384443201e-05, | |
| "loss": 0.6373, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.2747169811320755, | |
| "grad_norm": 1.4950560043816827, | |
| "learning_rate": 3.1925013989927255e-05, | |
| "loss": 0.9561, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.2762264150943397, | |
| "grad_norm": 0.39850378462049746, | |
| "learning_rate": 3.18970341354225e-05, | |
| "loss": 0.6525, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.2777358490566038, | |
| "grad_norm": 0.4135301305547025, | |
| "learning_rate": 3.186905428091774e-05, | |
| "loss": 0.6623, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.2792452830188679, | |
| "grad_norm": 0.42557572265505506, | |
| "learning_rate": 3.1841074426412985e-05, | |
| "loss": 0.6964, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.2807547169811322, | |
| "grad_norm": 0.39760056122405496, | |
| "learning_rate": 3.181309457190822e-05, | |
| "loss": 0.6106, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.2822641509433963, | |
| "grad_norm": 0.40798606836706397, | |
| "learning_rate": 3.178511471740347e-05, | |
| "loss": 0.5632, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.2837735849056604, | |
| "grad_norm": 0.3462421636576025, | |
| "learning_rate": 3.1757134862898716e-05, | |
| "loss": 0.5894, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.2852830188679245, | |
| "grad_norm": 0.3722881735233624, | |
| "learning_rate": 3.172915500839396e-05, | |
| "loss": 0.6412, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.2867924528301886, | |
| "grad_norm": 0.37960556416454605, | |
| "learning_rate": 3.17011751538892e-05, | |
| "loss": 0.6001, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.288301886792453, | |
| "grad_norm": 0.4276713553028228, | |
| "learning_rate": 3.1673195299384446e-05, | |
| "loss": 0.6603, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.289811320754717, | |
| "grad_norm": 0.7095036362422655, | |
| "learning_rate": 3.164521544487969e-05, | |
| "loss": 0.6137, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.2913207547169812, | |
| "grad_norm": 0.4807408403609567, | |
| "learning_rate": 3.161723559037493e-05, | |
| "loss": 0.6588, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.2928301886792453, | |
| "grad_norm": 0.4358649697165436, | |
| "learning_rate": 3.1589255735870176e-05, | |
| "loss": 0.5926, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.2943396226415094, | |
| "grad_norm": 0.41013182956481836, | |
| "learning_rate": 3.156127588136542e-05, | |
| "loss": 0.6665, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.2958490566037737, | |
| "grad_norm": 0.41515671451727343, | |
| "learning_rate": 3.153329602686066e-05, | |
| "loss": 0.7209, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.2973584905660378, | |
| "grad_norm": 0.3942619832225865, | |
| "learning_rate": 3.1505316172355906e-05, | |
| "loss": 0.6667, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.298867924528302, | |
| "grad_norm": 0.4021542796788365, | |
| "learning_rate": 3.147733631785115e-05, | |
| "loss": 0.6465, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.300377358490566, | |
| "grad_norm": 0.37633875180949317, | |
| "learning_rate": 3.144935646334639e-05, | |
| "loss": 0.6343, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.3018867924528301, | |
| "grad_norm": 0.7184734573887464, | |
| "learning_rate": 3.1421376608841636e-05, | |
| "loss": 0.6515, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.3033962264150944, | |
| "grad_norm": 0.36958219984109786, | |
| "learning_rate": 3.139339675433688e-05, | |
| "loss": 0.6474, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.3049056603773586, | |
| "grad_norm": 0.41188446823897223, | |
| "learning_rate": 3.136541689983212e-05, | |
| "loss": 0.6052, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.3064150943396227, | |
| "grad_norm": 0.42281806811295863, | |
| "learning_rate": 3.1337437045327366e-05, | |
| "loss": 0.6652, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.3079245283018868, | |
| "grad_norm": 0.33175743854835926, | |
| "learning_rate": 3.130945719082261e-05, | |
| "loss": 0.5977, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.3094339622641509, | |
| "grad_norm": 0.37734563826001954, | |
| "learning_rate": 3.128147733631785e-05, | |
| "loss": 0.5966, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.3109433962264152, | |
| "grad_norm": 0.4182778269406502, | |
| "learning_rate": 3.1253497481813096e-05, | |
| "loss": 0.6583, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.3124528301886793, | |
| "grad_norm": 0.36140009515229593, | |
| "learning_rate": 3.122551762730834e-05, | |
| "loss": 0.6068, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.3139622641509434, | |
| "grad_norm": 0.37977481362373233, | |
| "learning_rate": 3.119753777280358e-05, | |
| "loss": 0.654, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.3154716981132075, | |
| "grad_norm": 0.3416259213239559, | |
| "learning_rate": 3.1169557918298826e-05, | |
| "loss": 0.6387, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.3169811320754716, | |
| "grad_norm": 0.3564805566672587, | |
| "learning_rate": 3.114157806379407e-05, | |
| "loss": 0.6369, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.318490566037736, | |
| "grad_norm": 0.3803523160244477, | |
| "learning_rate": 3.111359820928931e-05, | |
| "loss": 0.7093, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.39166045934086346, | |
| "learning_rate": 3.1085618354784556e-05, | |
| "loss": 0.6496, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.3215094339622642, | |
| "grad_norm": 0.4353767626286411, | |
| "learning_rate": 3.10576385002798e-05, | |
| "loss": 0.6266, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.3230188679245283, | |
| "grad_norm": 0.3593032604836283, | |
| "learning_rate": 3.102965864577504e-05, | |
| "loss": 0.6782, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.3245283018867924, | |
| "grad_norm": 0.4168829319681385, | |
| "learning_rate": 3.1001678791270286e-05, | |
| "loss": 0.6282, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.3260377358490567, | |
| "grad_norm": 0.37757583489653257, | |
| "learning_rate": 3.097369893676553e-05, | |
| "loss": 0.6298, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.3275471698113208, | |
| "grad_norm": 0.3501040816376274, | |
| "learning_rate": 3.094571908226077e-05, | |
| "loss": 0.6461, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.329056603773585, | |
| "grad_norm": 0.4721309371418326, | |
| "learning_rate": 3.0917739227756016e-05, | |
| "loss": 0.6283, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.330566037735849, | |
| "grad_norm": 0.40370139757838286, | |
| "learning_rate": 3.088975937325126e-05, | |
| "loss": 0.6533, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.3320754716981131, | |
| "grad_norm": 0.4191434348168408, | |
| "learning_rate": 3.08617795187465e-05, | |
| "loss": 0.5634, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.3335849056603775, | |
| "grad_norm": 0.47048915913727934, | |
| "learning_rate": 3.0833799664241747e-05, | |
| "loss": 0.6498, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.3350943396226416, | |
| "grad_norm": 0.5386238703131466, | |
| "learning_rate": 3.080581980973699e-05, | |
| "loss": 0.6598, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.3366037735849057, | |
| "grad_norm": 0.4046153720620124, | |
| "learning_rate": 3.077783995523223e-05, | |
| "loss": 0.6303, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.3381132075471698, | |
| "grad_norm": 0.5092275981590267, | |
| "learning_rate": 3.0749860100727477e-05, | |
| "loss": 0.5994, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.3396226415094339, | |
| "grad_norm": 0.4201270256233553, | |
| "learning_rate": 3.072188024622272e-05, | |
| "loss": 0.6035, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.3411320754716982, | |
| "grad_norm": 0.44363066935554674, | |
| "learning_rate": 3.069390039171797e-05, | |
| "loss": 0.6149, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.3426415094339623, | |
| "grad_norm": 0.45931419806615414, | |
| "learning_rate": 3.066592053721321e-05, | |
| "loss": 0.6853, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.3441509433962264, | |
| "grad_norm": 0.3981069141574624, | |
| "learning_rate": 3.063794068270845e-05, | |
| "loss": 0.6445, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.3456603773584905, | |
| "grad_norm": 0.4033897537692928, | |
| "learning_rate": 3.0609960828203693e-05, | |
| "loss": 0.6657, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.3471698113207546, | |
| "grad_norm": 0.3858298350639289, | |
| "learning_rate": 3.058198097369894e-05, | |
| "loss": 0.6255, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.348679245283019, | |
| "grad_norm": 0.47123781695477013, | |
| "learning_rate": 3.055400111919418e-05, | |
| "loss": 0.6509, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.350188679245283, | |
| "grad_norm": 0.4029191937376374, | |
| "learning_rate": 3.0526021264689424e-05, | |
| "loss": 0.6651, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.3516981132075472, | |
| "grad_norm": 0.388896779384108, | |
| "learning_rate": 3.049804141018467e-05, | |
| "loss": 0.6395, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.3532075471698113, | |
| "grad_norm": 0.3768031336195982, | |
| "learning_rate": 3.0470061555679914e-05, | |
| "loss": 0.655, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.3547169811320754, | |
| "grad_norm": 0.3984983631125007, | |
| "learning_rate": 3.0442081701175157e-05, | |
| "loss": 0.6149, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.3562264150943397, | |
| "grad_norm": 0.4066202145440965, | |
| "learning_rate": 3.04141018466704e-05, | |
| "loss": 0.6486, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.3577358490566038, | |
| "grad_norm": 0.44059085553416244, | |
| "learning_rate": 3.0386121992165644e-05, | |
| "loss": 0.676, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.359245283018868, | |
| "grad_norm": 0.41928112497983105, | |
| "learning_rate": 3.0358142137660884e-05, | |
| "loss": 0.6166, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.360754716981132, | |
| "grad_norm": 0.4484411056202494, | |
| "learning_rate": 3.0330162283156127e-05, | |
| "loss": 0.5916, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.3622641509433961, | |
| "grad_norm": 0.39462759464635006, | |
| "learning_rate": 3.030218242865137e-05, | |
| "loss": 0.6252, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.3637735849056605, | |
| "grad_norm": 0.3064423590338561, | |
| "learning_rate": 3.0274202574146614e-05, | |
| "loss": 0.7015, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.3652830188679246, | |
| "grad_norm": 0.4187213380492406, | |
| "learning_rate": 3.024622271964186e-05, | |
| "loss": 0.6168, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.3667924528301887, | |
| "grad_norm": 0.40261471242008323, | |
| "learning_rate": 3.0218242865137104e-05, | |
| "loss": 0.7055, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.3683018867924528, | |
| "grad_norm": 0.3481493833030105, | |
| "learning_rate": 3.0190263010632347e-05, | |
| "loss": 0.6208, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.369811320754717, | |
| "grad_norm": 0.35049655031964855, | |
| "learning_rate": 3.016228315612759e-05, | |
| "loss": 0.5781, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.3713207547169812, | |
| "grad_norm": 0.42278059968973397, | |
| "learning_rate": 3.0134303301622834e-05, | |
| "loss": 0.5904, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.3728301886792453, | |
| "grad_norm": 0.3445994928179493, | |
| "learning_rate": 3.0106323447118077e-05, | |
| "loss": 0.6288, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.3743396226415094, | |
| "grad_norm": 0.38967053457550976, | |
| "learning_rate": 3.0078343592613317e-05, | |
| "loss": 0.6392, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.3758490566037735, | |
| "grad_norm": 0.39707064143261184, | |
| "learning_rate": 3.005036373810856e-05, | |
| "loss": 0.6279, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.3773584905660377, | |
| "grad_norm": 0.35444427397617145, | |
| "learning_rate": 3.0022383883603804e-05, | |
| "loss": 0.5976, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.378867924528302, | |
| "grad_norm": 0.34690507904913026, | |
| "learning_rate": 2.999440402909905e-05, | |
| "loss": 0.634, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.380377358490566, | |
| "grad_norm": 0.4626788655800128, | |
| "learning_rate": 2.9966424174594294e-05, | |
| "loss": 0.6583, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.3818867924528302, | |
| "grad_norm": 0.3671421219021182, | |
| "learning_rate": 2.9938444320089538e-05, | |
| "loss": 0.6514, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.3833962264150943, | |
| "grad_norm": 0.37784633929271194, | |
| "learning_rate": 2.991046446558478e-05, | |
| "loss": 0.601, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.3849056603773584, | |
| "grad_norm": 0.4089029572046124, | |
| "learning_rate": 2.9882484611080024e-05, | |
| "loss": 0.6588, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.3864150943396227, | |
| "grad_norm": 0.3455888490666691, | |
| "learning_rate": 2.9854504756575268e-05, | |
| "loss": 0.6022, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.3879245283018868, | |
| "grad_norm": 0.3654741927522717, | |
| "learning_rate": 2.9826524902070514e-05, | |
| "loss": 0.6638, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.389433962264151, | |
| "grad_norm": 0.3502350361285747, | |
| "learning_rate": 2.9798545047565758e-05, | |
| "loss": 0.6645, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.390943396226415, | |
| "grad_norm": 0.37331731488300535, | |
| "learning_rate": 2.9770565193060994e-05, | |
| "loss": 0.6074, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.3924528301886792, | |
| "grad_norm": 0.3185944385198641, | |
| "learning_rate": 2.9742585338556238e-05, | |
| "loss": 0.5764, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.3939622641509435, | |
| "grad_norm": 0.37558027024168333, | |
| "learning_rate": 2.9714605484051484e-05, | |
| "loss": 0.6027, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.3954716981132076, | |
| "grad_norm": 0.3494928337929872, | |
| "learning_rate": 2.9686625629546728e-05, | |
| "loss": 0.6464, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.3969811320754717, | |
| "grad_norm": 0.41717609426235447, | |
| "learning_rate": 2.965864577504197e-05, | |
| "loss": 0.6136, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.3984905660377358, | |
| "grad_norm": 0.3690138657188448, | |
| "learning_rate": 2.9630665920537215e-05, | |
| "loss": 0.6544, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.36363869250987335, | |
| "learning_rate": 2.9602686066032458e-05, | |
| "loss": 0.6072, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.4015094339622642, | |
| "grad_norm": 0.327574112494754, | |
| "learning_rate": 2.9574706211527705e-05, | |
| "loss": 0.6352, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.4030188679245283, | |
| "grad_norm": 0.35727876020347676, | |
| "learning_rate": 2.9546726357022948e-05, | |
| "loss": 0.5951, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.4045283018867925, | |
| "grad_norm": 0.31716305744497264, | |
| "learning_rate": 2.951874650251819e-05, | |
| "loss": 0.6039, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.4060377358490566, | |
| "grad_norm": 0.33093980503518344, | |
| "learning_rate": 2.9490766648013428e-05, | |
| "loss": 0.6163, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.4075471698113207, | |
| "grad_norm": 0.3861900118247219, | |
| "learning_rate": 2.9462786793508675e-05, | |
| "loss": 0.6153, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.409056603773585, | |
| "grad_norm": 0.3277916345887553, | |
| "learning_rate": 2.9434806939003918e-05, | |
| "loss": 0.6284, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.410566037735849, | |
| "grad_norm": 0.3780457505042723, | |
| "learning_rate": 2.940682708449916e-05, | |
| "loss": 0.6452, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.4120754716981132, | |
| "grad_norm": 0.3465233045348986, | |
| "learning_rate": 2.9378847229994405e-05, | |
| "loss": 0.7393, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.4135849056603773, | |
| "grad_norm": 2.5533756746713117, | |
| "learning_rate": 2.9350867375489648e-05, | |
| "loss": 0.5907, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.4150943396226414, | |
| "grad_norm": 0.4089686961924739, | |
| "learning_rate": 2.932288752098489e-05, | |
| "loss": 0.6067, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.4166037735849057, | |
| "grad_norm": 0.35586033724923993, | |
| "learning_rate": 2.9294907666480138e-05, | |
| "loss": 0.5488, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.4181132075471699, | |
| "grad_norm": 0.4380246394519367, | |
| "learning_rate": 2.926692781197538e-05, | |
| "loss": 0.6399, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.419622641509434, | |
| "grad_norm": 0.4249638666583559, | |
| "learning_rate": 2.9238947957470625e-05, | |
| "loss": 0.6233, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.421132075471698, | |
| "grad_norm": 0.4153579482995771, | |
| "learning_rate": 2.9210968102965865e-05, | |
| "loss": 0.6602, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.4226415094339622, | |
| "grad_norm": 0.4754922849198537, | |
| "learning_rate": 2.918298824846111e-05, | |
| "loss": 0.7016, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.4241509433962265, | |
| "grad_norm": 0.3236274287786117, | |
| "learning_rate": 2.915500839395635e-05, | |
| "loss": 0.5863, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.4256603773584906, | |
| "grad_norm": 0.41992181352774643, | |
| "learning_rate": 2.9127028539451595e-05, | |
| "loss": 0.6258, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.4271698113207547, | |
| "grad_norm": 0.8115843394380751, | |
| "learning_rate": 2.909904868494684e-05, | |
| "loss": 0.6465, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.4286792452830188, | |
| "grad_norm": 0.39731341593956065, | |
| "learning_rate": 2.9071068830442082e-05, | |
| "loss": 0.6443, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.430188679245283, | |
| "grad_norm": 0.36644813089178, | |
| "learning_rate": 2.904308897593733e-05, | |
| "loss": 0.5728, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.4316981132075473, | |
| "grad_norm": 0.438411822701408, | |
| "learning_rate": 2.9015109121432572e-05, | |
| "loss": 0.5963, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 1.4332075471698114, | |
| "grad_norm": 0.38295490840225677, | |
| "learning_rate": 2.8987129266927815e-05, | |
| "loss": 0.6152, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4347169811320755, | |
| "grad_norm": 0.39618114403032584, | |
| "learning_rate": 2.895914941242306e-05, | |
| "loss": 0.6637, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 1.4362264150943396, | |
| "grad_norm": 0.3815428588638221, | |
| "learning_rate": 2.8931169557918302e-05, | |
| "loss": 0.5864, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.4377358490566037, | |
| "grad_norm": 0.3663074285581354, | |
| "learning_rate": 2.8903189703413542e-05, | |
| "loss": 0.6733, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 1.439245283018868, | |
| "grad_norm": 0.3668247689466776, | |
| "learning_rate": 2.8875209848908785e-05, | |
| "loss": 0.6375, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.440754716981132, | |
| "grad_norm": 0.3535138691082089, | |
| "learning_rate": 2.884722999440403e-05, | |
| "loss": 0.5939, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.4422641509433962, | |
| "grad_norm": 0.3634202864100825, | |
| "learning_rate": 2.8819250139899272e-05, | |
| "loss": 0.6292, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.4437735849056603, | |
| "grad_norm": 0.3572638644281544, | |
| "learning_rate": 2.879127028539452e-05, | |
| "loss": 0.6664, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 1.4452830188679244, | |
| "grad_norm": 0.3508425660348941, | |
| "learning_rate": 2.8763290430889762e-05, | |
| "loss": 0.6468, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.4467924528301888, | |
| "grad_norm": 0.3707956017303932, | |
| "learning_rate": 2.8735310576385005e-05, | |
| "loss": 0.6495, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.4483018867924529, | |
| "grad_norm": 0.3278213890755489, | |
| "learning_rate": 2.870733072188025e-05, | |
| "loss": 0.6217, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.449811320754717, | |
| "grad_norm": 0.40970965375073787, | |
| "learning_rate": 2.8679350867375492e-05, | |
| "loss": 0.6451, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 1.451320754716981, | |
| "grad_norm": 0.3688583638649611, | |
| "learning_rate": 2.8651371012870736e-05, | |
| "loss": 0.6251, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.4528301886792452, | |
| "grad_norm": 0.34179313589597693, | |
| "learning_rate": 2.8623391158365976e-05, | |
| "loss": 0.6409, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 1.4543396226415095, | |
| "grad_norm": 0.3448261436866798, | |
| "learning_rate": 2.859541130386122e-05, | |
| "loss": 0.5905, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.4558490566037736, | |
| "grad_norm": 0.33694291297212864, | |
| "learning_rate": 2.8567431449356462e-05, | |
| "loss": 0.5761, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.4573584905660377, | |
| "grad_norm": 0.3118187273291105, | |
| "learning_rate": 2.8539451594851706e-05, | |
| "loss": 0.6088, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.4588679245283018, | |
| "grad_norm": 0.32045778178994183, | |
| "learning_rate": 2.8511471740346952e-05, | |
| "loss": 0.6205, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 1.460377358490566, | |
| "grad_norm": 0.45338400028752146, | |
| "learning_rate": 2.8483491885842196e-05, | |
| "loss": 0.5963, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.4618867924528303, | |
| "grad_norm": 0.32689068860548115, | |
| "learning_rate": 2.845551203133744e-05, | |
| "loss": 0.5745, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 1.4633962264150944, | |
| "grad_norm": 0.38005063017031954, | |
| "learning_rate": 2.8427532176832682e-05, | |
| "loss": 0.6426, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.4649056603773585, | |
| "grad_norm": 0.3309282292367092, | |
| "learning_rate": 2.8399552322327926e-05, | |
| "loss": 0.669, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 1.4664150943396226, | |
| "grad_norm": 1.4917946740992876, | |
| "learning_rate": 2.8371572467823173e-05, | |
| "loss": 0.6343, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.4679245283018867, | |
| "grad_norm": 0.3233023167238395, | |
| "learning_rate": 2.8343592613318416e-05, | |
| "loss": 0.6077, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 1.469433962264151, | |
| "grad_norm": 0.3262290819657421, | |
| "learning_rate": 2.8315612758813653e-05, | |
| "loss": 0.6578, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.4709433962264151, | |
| "grad_norm": 0.3339350102619771, | |
| "learning_rate": 2.8287632904308896e-05, | |
| "loss": 0.621, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.4724528301886792, | |
| "grad_norm": 0.3192961373970712, | |
| "learning_rate": 2.8259653049804143e-05, | |
| "loss": 0.6236, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.4739622641509433, | |
| "grad_norm": 0.49845825196565774, | |
| "learning_rate": 2.8231673195299386e-05, | |
| "loss": 0.6254, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 1.4754716981132074, | |
| "grad_norm": 0.3485802760303236, | |
| "learning_rate": 2.820369334079463e-05, | |
| "loss": 0.6046, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.4769811320754718, | |
| "grad_norm": 0.3598346283166558, | |
| "learning_rate": 2.8175713486289873e-05, | |
| "loss": 0.6621, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 1.4784905660377359, | |
| "grad_norm": 0.7079216014041246, | |
| "learning_rate": 2.8147733631785116e-05, | |
| "loss": 0.6659, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.3996327762211729, | |
| "learning_rate": 2.811975377728036e-05, | |
| "loss": 0.5864, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 1.481509433962264, | |
| "grad_norm": 0.4468257832623057, | |
| "learning_rate": 2.8091773922775606e-05, | |
| "loss": 0.629, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.4830188679245282, | |
| "grad_norm": 0.35338065855143863, | |
| "learning_rate": 2.806379406827085e-05, | |
| "loss": 0.5689, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.4845283018867925, | |
| "grad_norm": 0.3447521513306188, | |
| "learning_rate": 2.8035814213766086e-05, | |
| "loss": 0.5519, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.4860377358490566, | |
| "grad_norm": 0.34657588388233884, | |
| "learning_rate": 2.8007834359261333e-05, | |
| "loss": 0.6423, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.4875471698113207, | |
| "grad_norm": 0.34128797853204484, | |
| "learning_rate": 2.7979854504756576e-05, | |
| "loss": 0.6009, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.4890566037735848, | |
| "grad_norm": 0.8214335030714001, | |
| "learning_rate": 2.795187465025182e-05, | |
| "loss": 0.5986, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 1.490566037735849, | |
| "grad_norm": 9.613076505885713, | |
| "learning_rate": 2.7923894795747063e-05, | |
| "loss": 0.8501, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.4920754716981133, | |
| "grad_norm": 0.5243283003121673, | |
| "learning_rate": 2.7895914941242306e-05, | |
| "loss": 0.6533, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 1.4935849056603774, | |
| "grad_norm": 0.4045389908997858, | |
| "learning_rate": 2.786793508673755e-05, | |
| "loss": 0.6253, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.4950943396226415, | |
| "grad_norm": 0.4281589533591875, | |
| "learning_rate": 2.7839955232232796e-05, | |
| "loss": 0.6287, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 1.4966037735849056, | |
| "grad_norm": 0.38192093777254366, | |
| "learning_rate": 2.781197537772804e-05, | |
| "loss": 0.6681, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.4981132075471697, | |
| "grad_norm": 0.47848727765965543, | |
| "learning_rate": 2.7783995523223283e-05, | |
| "loss": 0.6019, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 1.499622641509434, | |
| "grad_norm": 0.3985883342403703, | |
| "learning_rate": 2.775601566871852e-05, | |
| "loss": 0.6599, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.5011320754716981, | |
| "grad_norm": 0.3434562172845685, | |
| "learning_rate": 2.7728035814213767e-05, | |
| "loss": 0.6081, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.5026415094339622, | |
| "grad_norm": 0.3980403992472545, | |
| "learning_rate": 2.770005595970901e-05, | |
| "loss": 0.6172, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.5041509433962266, | |
| "grad_norm": 0.3763385571813578, | |
| "learning_rate": 2.7672076105204253e-05, | |
| "loss": 0.6792, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 1.5056603773584905, | |
| "grad_norm": 0.40146854330428877, | |
| "learning_rate": 2.7644096250699497e-05, | |
| "loss": 0.6224, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.5071698113207548, | |
| "grad_norm": 0.33301446846068716, | |
| "learning_rate": 2.761611639619474e-05, | |
| "loss": 0.6887, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 1.5086792452830189, | |
| "grad_norm": 0.38371280593670326, | |
| "learning_rate": 2.7588136541689987e-05, | |
| "loss": 0.6132, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.510188679245283, | |
| "grad_norm": 0.37104373493288634, | |
| "learning_rate": 2.756015668718523e-05, | |
| "loss": 0.6067, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 1.5116981132075473, | |
| "grad_norm": 0.47232500165135083, | |
| "learning_rate": 2.7532176832680473e-05, | |
| "loss": 0.6102, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.5132075471698112, | |
| "grad_norm": 0.33043113034271476, | |
| "learning_rate": 2.7504196978175717e-05, | |
| "loss": 0.6384, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.5147169811320755, | |
| "grad_norm": 0.3448322098677679, | |
| "learning_rate": 2.747621712367096e-05, | |
| "loss": 0.6998, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.5162264150943396, | |
| "grad_norm": 0.40306936202383525, | |
| "learning_rate": 2.74482372691662e-05, | |
| "loss": 0.6676, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.5177358490566037, | |
| "grad_norm": 0.32212746145926247, | |
| "learning_rate": 2.7420257414661444e-05, | |
| "loss": 0.7054, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.519245283018868, | |
| "grad_norm": 0.3630795836083071, | |
| "learning_rate": 2.7392277560156687e-05, | |
| "loss": 0.6069, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.520754716981132, | |
| "grad_norm": 0.35892977901686324, | |
| "learning_rate": 2.736429770565193e-05, | |
| "loss": 0.6303, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.5222641509433963, | |
| "grad_norm": 0.34033834989241846, | |
| "learning_rate": 2.7336317851147174e-05, | |
| "loss": 0.6073, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 1.5237735849056604, | |
| "grad_norm": 0.3606987139401938, | |
| "learning_rate": 2.730833799664242e-05, | |
| "loss": 0.6533, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.5252830188679245, | |
| "grad_norm": 0.35197301530590785, | |
| "learning_rate": 2.7280358142137664e-05, | |
| "loss": 0.6774, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 1.5267924528301888, | |
| "grad_norm": 0.3409953188778291, | |
| "learning_rate": 2.7252378287632907e-05, | |
| "loss": 0.6111, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.5283018867924527, | |
| "grad_norm": 0.35710307374963385, | |
| "learning_rate": 2.722439843312815e-05, | |
| "loss": 0.5887, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 1.529811320754717, | |
| "grad_norm": 0.39361169119324246, | |
| "learning_rate": 2.7196418578623394e-05, | |
| "loss": 0.6824, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.5313207547169811, | |
| "grad_norm": 0.42089198638548375, | |
| "learning_rate": 2.7168438724118634e-05, | |
| "loss": 0.6084, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.5328301886792453, | |
| "grad_norm": 0.4002037847566263, | |
| "learning_rate": 2.7140458869613877e-05, | |
| "loss": 0.6164, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.5343396226415096, | |
| "grad_norm": 0.4279739458874451, | |
| "learning_rate": 2.711247901510912e-05, | |
| "loss": 0.602, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 1.5358490566037735, | |
| "grad_norm": 0.3741065695840471, | |
| "learning_rate": 2.7084499160604364e-05, | |
| "loss": 0.5819, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.5373584905660378, | |
| "grad_norm": 0.4279718365040865, | |
| "learning_rate": 2.705651930609961e-05, | |
| "loss": 0.6296, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 1.538867924528302, | |
| "grad_norm": 0.448203149422696, | |
| "learning_rate": 2.7028539451594854e-05, | |
| "loss": 0.5985, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.540377358490566, | |
| "grad_norm": 0.39251867440111504, | |
| "learning_rate": 2.7000559597090097e-05, | |
| "loss": 0.6152, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 1.5418867924528303, | |
| "grad_norm": 0.5318193655500686, | |
| "learning_rate": 2.697257974258534e-05, | |
| "loss": 0.6533, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.5433962264150942, | |
| "grad_norm": 0.36282379983998825, | |
| "learning_rate": 2.6944599888080584e-05, | |
| "loss": 0.6422, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 1.5449056603773585, | |
| "grad_norm": 0.4679464718781621, | |
| "learning_rate": 2.6916620033575827e-05, | |
| "loss": 0.6914, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.5464150943396227, | |
| "grad_norm": 0.390523648842573, | |
| "learning_rate": 2.6888640179071067e-05, | |
| "loss": 0.6562, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.5479245283018868, | |
| "grad_norm": 0.3704983255661872, | |
| "learning_rate": 2.686066032456631e-05, | |
| "loss": 0.6257, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.549433962264151, | |
| "grad_norm": 0.41154051577412576, | |
| "learning_rate": 2.6832680470061554e-05, | |
| "loss": 0.6028, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 1.550943396226415, | |
| "grad_norm": 0.33250241430063915, | |
| "learning_rate": 2.68047006155568e-05, | |
| "loss": 0.6491, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.5524528301886793, | |
| "grad_norm": 0.4012914420070519, | |
| "learning_rate": 2.6776720761052044e-05, | |
| "loss": 0.6053, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 1.5539622641509434, | |
| "grad_norm": 0.47506656031425826, | |
| "learning_rate": 2.6748740906547288e-05, | |
| "loss": 0.6044, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.5554716981132075, | |
| "grad_norm": 0.3707491589866621, | |
| "learning_rate": 2.672076105204253e-05, | |
| "loss": 0.6703, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 1.5569811320754718, | |
| "grad_norm": 0.34186133320917916, | |
| "learning_rate": 2.6692781197537774e-05, | |
| "loss": 0.6596, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.5584905660377357, | |
| "grad_norm": 0.3491021187362257, | |
| "learning_rate": 2.6664801343033018e-05, | |
| "loss": 0.5844, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.7155210370076859, | |
| "learning_rate": 2.6636821488528264e-05, | |
| "loss": 0.6393, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.5615094339622642, | |
| "grad_norm": 0.37905853694030756, | |
| "learning_rate": 2.6608841634023508e-05, | |
| "loss": 0.5804, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.5630188679245283, | |
| "grad_norm": 0.39249010794322703, | |
| "learning_rate": 2.6580861779518744e-05, | |
| "loss": 0.6622, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.5645283018867926, | |
| "grad_norm": 0.3177813349439448, | |
| "learning_rate": 2.6552881925013988e-05, | |
| "loss": 0.6893, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 1.5660377358490565, | |
| "grad_norm": 0.3578675379157616, | |
| "learning_rate": 2.6524902070509235e-05, | |
| "loss": 0.6743, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.5675471698113208, | |
| "grad_norm": 0.38432047726232615, | |
| "learning_rate": 2.6496922216004478e-05, | |
| "loss": 0.5848, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 1.569056603773585, | |
| "grad_norm": 0.38664999715982123, | |
| "learning_rate": 2.646894236149972e-05, | |
| "loss": 0.6501, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.570566037735849, | |
| "grad_norm": 0.3030876699883648, | |
| "learning_rate": 2.6440962506994965e-05, | |
| "loss": 0.6048, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.5720754716981133, | |
| "grad_norm": 0.3282480223581648, | |
| "learning_rate": 2.6412982652490208e-05, | |
| "loss": 0.5784, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.5735849056603772, | |
| "grad_norm": 0.34287712921209795, | |
| "learning_rate": 2.638500279798545e-05, | |
| "loss": 0.5587, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 1.5750943396226416, | |
| "grad_norm": 0.4249512336319463, | |
| "learning_rate": 2.6357022943480698e-05, | |
| "loss": 0.6872, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.5766037735849057, | |
| "grad_norm": 0.36483078091450444, | |
| "learning_rate": 2.632904308897594e-05, | |
| "loss": 0.6272, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.5781132075471698, | |
| "grad_norm": 0.33642660623413323, | |
| "learning_rate": 2.6301063234471178e-05, | |
| "loss": 0.6152, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.579622641509434, | |
| "grad_norm": 0.3795901995942625, | |
| "learning_rate": 2.6273083379966425e-05, | |
| "loss": 0.6457, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 1.581132075471698, | |
| "grad_norm": 0.4999474857747963, | |
| "learning_rate": 2.6245103525461668e-05, | |
| "loss": 0.6492, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.5826415094339623, | |
| "grad_norm": 0.31486204696092474, | |
| "learning_rate": 2.621712367095691e-05, | |
| "loss": 0.6266, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 1.5841509433962264, | |
| "grad_norm": 0.3742304773869301, | |
| "learning_rate": 2.6189143816452155e-05, | |
| "loss": 0.6116, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.5856603773584905, | |
| "grad_norm": 0.41017165219333346, | |
| "learning_rate": 2.6161163961947398e-05, | |
| "loss": 0.6209, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 1.5871698113207549, | |
| "grad_norm": 0.32510313559158954, | |
| "learning_rate": 2.613318410744264e-05, | |
| "loss": 0.5938, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.5886792452830187, | |
| "grad_norm": 0.34975228019540705, | |
| "learning_rate": 2.610520425293789e-05, | |
| "loss": 0.6052, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 1.590188679245283, | |
| "grad_norm": 0.3593828761751314, | |
| "learning_rate": 2.6077224398433132e-05, | |
| "loss": 0.5994, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.5916981132075472, | |
| "grad_norm": 0.3379735940847511, | |
| "learning_rate": 2.6049244543928375e-05, | |
| "loss": 0.6682, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.5932075471698113, | |
| "grad_norm": 0.3688041258843097, | |
| "learning_rate": 2.602126468942362e-05, | |
| "loss": 0.7183, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.5947169811320756, | |
| "grad_norm": 0.4216449339373789, | |
| "learning_rate": 2.599328483491886e-05, | |
| "loss": 0.7043, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 1.5962264150943395, | |
| "grad_norm": 2.267511774259029, | |
| "learning_rate": 2.5965304980414102e-05, | |
| "loss": 0.683, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.5977358490566038, | |
| "grad_norm": 0.3895472450306408, | |
| "learning_rate": 2.5937325125909345e-05, | |
| "loss": 0.6855, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 1.599245283018868, | |
| "grad_norm": 0.41188597998599974, | |
| "learning_rate": 2.590934527140459e-05, | |
| "loss": 0.6083, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.600754716981132, | |
| "grad_norm": 0.3059560204490495, | |
| "learning_rate": 2.5881365416899832e-05, | |
| "loss": 0.5571, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 1.6022641509433964, | |
| "grad_norm": 0.34622860668786465, | |
| "learning_rate": 2.585338556239508e-05, | |
| "loss": 0.5771, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.6037735849056602, | |
| "grad_norm": 0.34479433863653974, | |
| "learning_rate": 2.5825405707890322e-05, | |
| "loss": 0.6081, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 1.6052830188679246, | |
| "grad_norm": 0.36272238390017086, | |
| "learning_rate": 2.5797425853385565e-05, | |
| "loss": 0.6959, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.6067924528301887, | |
| "grad_norm": 0.4094790107793089, | |
| "learning_rate": 2.576944599888081e-05, | |
| "loss": 0.6309, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.6083018867924528, | |
| "grad_norm": 0.3788756955199532, | |
| "learning_rate": 2.5741466144376052e-05, | |
| "loss": 0.5994, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.6098113207547171, | |
| "grad_norm": 0.363138476827497, | |
| "learning_rate": 2.5713486289871292e-05, | |
| "loss": 0.6657, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 1.611320754716981, | |
| "grad_norm": 0.35600650897268843, | |
| "learning_rate": 2.5685506435366535e-05, | |
| "loss": 0.6238, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.6128301886792453, | |
| "grad_norm": 0.31529869654986054, | |
| "learning_rate": 2.565752658086178e-05, | |
| "loss": 0.6157, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 1.6143396226415094, | |
| "grad_norm": 0.41343618537042476, | |
| "learning_rate": 2.5629546726357022e-05, | |
| "loss": 0.6847, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.6158490566037735, | |
| "grad_norm": 0.36948590263079245, | |
| "learning_rate": 2.560156687185227e-05, | |
| "loss": 0.6373, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 1.6173584905660379, | |
| "grad_norm": 0.3405631397524817, | |
| "learning_rate": 2.5573587017347512e-05, | |
| "loss": 0.6454, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.6188679245283017, | |
| "grad_norm": 0.3876899509134273, | |
| "learning_rate": 2.5545607162842756e-05, | |
| "loss": 0.6897, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 1.620377358490566, | |
| "grad_norm": 0.36648572255209627, | |
| "learning_rate": 2.5517627308338e-05, | |
| "loss": 0.6366, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.6218867924528302, | |
| "grad_norm": 0.39363149190822344, | |
| "learning_rate": 2.5489647453833242e-05, | |
| "loss": 0.6312, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.6233962264150943, | |
| "grad_norm": 0.393083017888767, | |
| "learning_rate": 2.5461667599328486e-05, | |
| "loss": 0.621, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.6249056603773586, | |
| "grad_norm": 0.42789295095166247, | |
| "learning_rate": 2.5433687744823726e-05, | |
| "loss": 0.6328, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 1.6264150943396225, | |
| "grad_norm": 0.34143614581342263, | |
| "learning_rate": 2.540570789031897e-05, | |
| "loss": 0.5833, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.6279245283018868, | |
| "grad_norm": 0.48993095092983346, | |
| "learning_rate": 2.5377728035814212e-05, | |
| "loss": 0.6499, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 1.629433962264151, | |
| "grad_norm": 0.38482007687414094, | |
| "learning_rate": 2.5349748181309456e-05, | |
| "loss": 0.624, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.630943396226415, | |
| "grad_norm": 0.3620296499170073, | |
| "learning_rate": 2.5321768326804703e-05, | |
| "loss": 0.6196, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 1.6324528301886794, | |
| "grad_norm": 0.4413925417638725, | |
| "learning_rate": 2.5293788472299946e-05, | |
| "loss": 0.6562, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.6339622641509433, | |
| "grad_norm": 0.31728101683770593, | |
| "learning_rate": 2.526580861779519e-05, | |
| "loss": 0.5931, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 1.6354716981132076, | |
| "grad_norm": 0.3242208210115639, | |
| "learning_rate": 2.5237828763290433e-05, | |
| "loss": 0.6428, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.6369811320754717, | |
| "grad_norm": 0.34313826598204494, | |
| "learning_rate": 2.5209848908785676e-05, | |
| "loss": 0.5892, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.6384905660377358, | |
| "grad_norm": 0.27873747077218364, | |
| "learning_rate": 2.518186905428092e-05, | |
| "loss": 0.6435, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.6400000000000001, | |
| "grad_norm": 0.34391194671727837, | |
| "learning_rate": 2.5153889199776166e-05, | |
| "loss": 0.6208, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 1.641509433962264, | |
| "grad_norm": 0.3202174999860832, | |
| "learning_rate": 2.5125909345271403e-05, | |
| "loss": 0.5982, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.6430188679245283, | |
| "grad_norm": 0.353421412519148, | |
| "learning_rate": 2.5097929490766646e-05, | |
| "loss": 0.6558, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 1.6445283018867924, | |
| "grad_norm": 0.33590448835443065, | |
| "learning_rate": 2.5069949636261893e-05, | |
| "loss": 0.5849, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.6460377358490565, | |
| "grad_norm": 0.3049009352630597, | |
| "learning_rate": 2.5041969781757136e-05, | |
| "loss": 0.6268, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 1.6475471698113209, | |
| "grad_norm": 0.32814946592178634, | |
| "learning_rate": 2.501398992725238e-05, | |
| "loss": 0.571, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.6490566037735848, | |
| "grad_norm": 0.32851218281151046, | |
| "learning_rate": 2.4986010072747623e-05, | |
| "loss": 0.5919, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 1.650566037735849, | |
| "grad_norm": 0.29866330007475594, | |
| "learning_rate": 2.4958030218242866e-05, | |
| "loss": 0.609, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.6520754716981132, | |
| "grad_norm": 0.2903575786430782, | |
| "learning_rate": 2.493005036373811e-05, | |
| "loss": 0.633, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.6535849056603773, | |
| "grad_norm": 0.34536143070473535, | |
| "learning_rate": 2.4902070509233353e-05, | |
| "loss": 0.6005, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.6550943396226416, | |
| "grad_norm": 0.3026599205345073, | |
| "learning_rate": 2.4874090654728596e-05, | |
| "loss": 0.6478, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 1.6566037735849055, | |
| "grad_norm": 0.306175601002009, | |
| "learning_rate": 2.484611080022384e-05, | |
| "loss": 0.6195, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.6581132075471698, | |
| "grad_norm": 0.3710617396392141, | |
| "learning_rate": 2.4818130945719083e-05, | |
| "loss": 0.6228, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 1.659622641509434, | |
| "grad_norm": 0.3377466953849946, | |
| "learning_rate": 2.4790151091214326e-05, | |
| "loss": 0.6511, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.661132075471698, | |
| "grad_norm": 0.31549413830814466, | |
| "learning_rate": 2.476217123670957e-05, | |
| "loss": 0.5633, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 1.6626415094339624, | |
| "grad_norm": 0.357754789091578, | |
| "learning_rate": 2.4734191382204813e-05, | |
| "loss": 0.6337, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.6641509433962263, | |
| "grad_norm": 0.3306548434162944, | |
| "learning_rate": 2.4706211527700057e-05, | |
| "loss": 0.5843, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 1.6656603773584906, | |
| "grad_norm": 6.054734215990575, | |
| "learning_rate": 2.46782316731953e-05, | |
| "loss": 0.661, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.6671698113207547, | |
| "grad_norm": 0.4115492900895262, | |
| "learning_rate": 2.4650251818690547e-05, | |
| "loss": 0.6434, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.6686792452830188, | |
| "grad_norm": 0.822362998347998, | |
| "learning_rate": 2.4622271964185787e-05, | |
| "loss": 0.6846, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.6701886792452831, | |
| "grad_norm": 0.4448253668967885, | |
| "learning_rate": 2.459429210968103e-05, | |
| "loss": 0.5628, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 1.671698113207547, | |
| "grad_norm": 0.3770953555568863, | |
| "learning_rate": 2.4566312255176273e-05, | |
| "loss": 0.6269, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.6732075471698113, | |
| "grad_norm": 0.5299343272830751, | |
| "learning_rate": 2.4538332400671517e-05, | |
| "loss": 0.6237, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 1.6747169811320755, | |
| "grad_norm": 0.3702282688776482, | |
| "learning_rate": 2.4510352546166763e-05, | |
| "loss": 0.6586, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.6762264150943396, | |
| "grad_norm": 0.39024405698123815, | |
| "learning_rate": 2.4482372691662003e-05, | |
| "loss": 0.6429, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 1.677735849056604, | |
| "grad_norm": 0.4537391331752708, | |
| "learning_rate": 2.4454392837157247e-05, | |
| "loss": 0.639, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.6792452830188678, | |
| "grad_norm": 0.3149708264349129, | |
| "learning_rate": 2.442641298265249e-05, | |
| "loss": 0.5799, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 1.680754716981132, | |
| "grad_norm": 0.332009587191511, | |
| "learning_rate": 2.4398433128147733e-05, | |
| "loss": 0.5921, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.6822641509433962, | |
| "grad_norm": 0.4987660993726321, | |
| "learning_rate": 2.437045327364298e-05, | |
| "loss": 0.5617, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.6837735849056603, | |
| "grad_norm": 0.38823515270339287, | |
| "learning_rate": 2.434247341913822e-05, | |
| "loss": 0.6943, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.6852830188679246, | |
| "grad_norm": 0.41948680248507986, | |
| "learning_rate": 2.4314493564633464e-05, | |
| "loss": 0.6714, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 1.6867924528301885, | |
| "grad_norm": 0.41536448849439084, | |
| "learning_rate": 2.4286513710128707e-05, | |
| "loss": 0.6236, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.6883018867924529, | |
| "grad_norm": 0.38203854076076366, | |
| "learning_rate": 2.4258533855623954e-05, | |
| "loss": 0.6081, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 1.689811320754717, | |
| "grad_norm": 0.3167935649962668, | |
| "learning_rate": 2.4230554001119197e-05, | |
| "loss": 0.6062, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.691320754716981, | |
| "grad_norm": 0.39343182435379037, | |
| "learning_rate": 2.420257414661444e-05, | |
| "loss": 0.6464, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 1.6928301886792454, | |
| "grad_norm": 0.38741191325073565, | |
| "learning_rate": 2.417459429210968e-05, | |
| "loss": 0.5858, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.6943396226415093, | |
| "grad_norm": 0.3110251540082128, | |
| "learning_rate": 2.4146614437604924e-05, | |
| "loss": 0.6742, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 1.6958490566037736, | |
| "grad_norm": 0.4205404270555864, | |
| "learning_rate": 2.411863458310017e-05, | |
| "loss": 0.6817, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.6973584905660377, | |
| "grad_norm": 0.3370144854328815, | |
| "learning_rate": 2.4090654728595414e-05, | |
| "loss": 0.6306, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.6988679245283018, | |
| "grad_norm": 0.31428186192021673, | |
| "learning_rate": 2.4062674874090657e-05, | |
| "loss": 0.6701, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.7003773584905661, | |
| "grad_norm": 0.35546961013819645, | |
| "learning_rate": 2.4034695019585897e-05, | |
| "loss": 0.5598, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 1.70188679245283, | |
| "grad_norm": 0.30842915313942293, | |
| "learning_rate": 2.400671516508114e-05, | |
| "loss": 0.6555, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.7033962264150944, | |
| "grad_norm": 0.3503526140590797, | |
| "learning_rate": 2.3978735310576387e-05, | |
| "loss": 0.6268, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 1.7049056603773585, | |
| "grad_norm": 0.3525131623141097, | |
| "learning_rate": 2.395075545607163e-05, | |
| "loss": 0.651, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.7064150943396226, | |
| "grad_norm": 0.362462685268756, | |
| "learning_rate": 2.3922775601566874e-05, | |
| "loss": 0.654, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 1.707924528301887, | |
| "grad_norm": 0.35270111632126117, | |
| "learning_rate": 2.3894795747062114e-05, | |
| "loss": 0.6304, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.7094339622641508, | |
| "grad_norm": 0.35642789633544897, | |
| "learning_rate": 2.386681589255736e-05, | |
| "loss": 0.569, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 1.7109433962264151, | |
| "grad_norm": 0.3548672747435073, | |
| "learning_rate": 2.3838836038052604e-05, | |
| "loss": 0.6078, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.7124528301886792, | |
| "grad_norm": 0.41883631687319944, | |
| "learning_rate": 2.3810856183547848e-05, | |
| "loss": 0.6137, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.7139622641509433, | |
| "grad_norm": 0.37255769585734255, | |
| "learning_rate": 2.378287632904309e-05, | |
| "loss": 0.6341, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.7154716981132077, | |
| "grad_norm": 0.343650289950691, | |
| "learning_rate": 2.375489647453833e-05, | |
| "loss": 0.6816, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 1.7169811320754715, | |
| "grad_norm": 0.3135936860965841, | |
| "learning_rate": 2.3726916620033578e-05, | |
| "loss": 0.661, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.7184905660377359, | |
| "grad_norm": 0.3354145369379278, | |
| "learning_rate": 2.369893676552882e-05, | |
| "loss": 0.6131, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.37591403645106586, | |
| "learning_rate": 2.3670956911024064e-05, | |
| "loss": 0.7752, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.721509433962264, | |
| "grad_norm": 3.276557779806758, | |
| "learning_rate": 2.3642977056519308e-05, | |
| "loss": 0.6548, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 1.7230188679245284, | |
| "grad_norm": 0.40078418503283786, | |
| "learning_rate": 2.361499720201455e-05, | |
| "loss": 0.6305, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.7245283018867923, | |
| "grad_norm": 0.3490281170396962, | |
| "learning_rate": 2.3587017347509794e-05, | |
| "loss": 0.6477, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 1.7260377358490566, | |
| "grad_norm": 0.3130044965355929, | |
| "learning_rate": 2.3559037493005038e-05, | |
| "loss": 0.6318, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.7275471698113207, | |
| "grad_norm": 0.31341978043560415, | |
| "learning_rate": 2.353105763850028e-05, | |
| "loss": 0.5936, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.7290566037735848, | |
| "grad_norm": 0.4007373637031972, | |
| "learning_rate": 2.3503077783995524e-05, | |
| "loss": 0.6289, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.7305660377358492, | |
| "grad_norm": 0.3145194497545859, | |
| "learning_rate": 2.3475097929490768e-05, | |
| "loss": 0.5794, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 1.732075471698113, | |
| "grad_norm": 0.41907955688569665, | |
| "learning_rate": 2.344711807498601e-05, | |
| "loss": 0.6204, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.7335849056603774, | |
| "grad_norm": 0.3368506672709832, | |
| "learning_rate": 2.3419138220481255e-05, | |
| "loss": 0.6453, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 1.7350943396226415, | |
| "grad_norm": 0.3578725045779005, | |
| "learning_rate": 2.3391158365976498e-05, | |
| "loss": 0.6131, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.7366037735849056, | |
| "grad_norm": 0.387646182019032, | |
| "learning_rate": 2.336317851147174e-05, | |
| "loss": 0.6591, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 1.73811320754717, | |
| "grad_norm": 0.370813360320474, | |
| "learning_rate": 2.3335198656966985e-05, | |
| "loss": 0.6132, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.7396226415094338, | |
| "grad_norm": 0.35325542219207184, | |
| "learning_rate": 2.3307218802462228e-05, | |
| "loss": 0.6152, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 1.7411320754716981, | |
| "grad_norm": 0.34775995134307486, | |
| "learning_rate": 2.327923894795747e-05, | |
| "loss": 0.696, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.7426415094339622, | |
| "grad_norm": 0.431893921598768, | |
| "learning_rate": 2.3251259093452715e-05, | |
| "loss": 0.5584, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.7441509433962263, | |
| "grad_norm": 0.4818685295387096, | |
| "learning_rate": 2.3223279238947958e-05, | |
| "loss": 0.648, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.7456603773584907, | |
| "grad_norm": 0.3148571492339726, | |
| "learning_rate": 2.31952993844432e-05, | |
| "loss": 0.6053, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 1.7471698113207546, | |
| "grad_norm": 0.49474292238819984, | |
| "learning_rate": 2.3167319529938445e-05, | |
| "loss": 0.6756, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.7486792452830189, | |
| "grad_norm": 0.42970659132808886, | |
| "learning_rate": 2.3139339675433688e-05, | |
| "loss": 0.6378, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 1.750188679245283, | |
| "grad_norm": 0.3467646221311692, | |
| "learning_rate": 2.311135982092893e-05, | |
| "loss": 0.6767, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.751698113207547, | |
| "grad_norm": 0.49298106645190726, | |
| "learning_rate": 2.3083379966424175e-05, | |
| "loss": 0.6482, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 1.7532075471698114, | |
| "grad_norm": 0.38860132343453263, | |
| "learning_rate": 2.305540011191942e-05, | |
| "loss": 0.6828, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.7547169811320755, | |
| "grad_norm": 0.36904627662475137, | |
| "learning_rate": 2.302742025741466e-05, | |
| "loss": 0.5228, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 1.7562264150943396, | |
| "grad_norm": 0.3699438829613781, | |
| "learning_rate": 2.2999440402909905e-05, | |
| "loss": 0.7172, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.7577358490566037, | |
| "grad_norm": 0.46842274714832544, | |
| "learning_rate": 2.297146054840515e-05, | |
| "loss": 0.6039, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.7592452830188678, | |
| "grad_norm": 0.33529915395896437, | |
| "learning_rate": 2.2943480693900392e-05, | |
| "loss": 0.6698, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.7607547169811322, | |
| "grad_norm": 1.0369116423976208, | |
| "learning_rate": 2.291550083939564e-05, | |
| "loss": 0.632, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 1.7622641509433963, | |
| "grad_norm": 0.40116881234876345, | |
| "learning_rate": 2.288752098489088e-05, | |
| "loss": 0.5547, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.7637735849056604, | |
| "grad_norm": 0.32276429823331587, | |
| "learning_rate": 2.2859541130386122e-05, | |
| "loss": 0.6588, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 1.7652830188679245, | |
| "grad_norm": 0.43292042576567075, | |
| "learning_rate": 2.2831561275881365e-05, | |
| "loss": 0.6191, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.7667924528301886, | |
| "grad_norm": 0.3272559617175695, | |
| "learning_rate": 2.280358142137661e-05, | |
| "loss": 0.6573, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 1.768301886792453, | |
| "grad_norm": 0.3315275623212996, | |
| "learning_rate": 2.2775601566871855e-05, | |
| "loss": 0.5584, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.769811320754717, | |
| "grad_norm": 0.3839260631685633, | |
| "learning_rate": 2.27476217123671e-05, | |
| "loss": 0.6137, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 1.7713207547169811, | |
| "grad_norm": 0.29910749329288777, | |
| "learning_rate": 2.271964185786234e-05, | |
| "loss": 0.593, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.7728301886792452, | |
| "grad_norm": 0.3966476953266179, | |
| "learning_rate": 2.2691662003357582e-05, | |
| "loss": 0.5881, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.7743396226415094, | |
| "grad_norm": 0.3834221903344162, | |
| "learning_rate": 2.266368214885283e-05, | |
| "loss": 0.652, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.7758490566037737, | |
| "grad_norm": 0.3086183820074408, | |
| "learning_rate": 2.2635702294348072e-05, | |
| "loss": 0.6265, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 1.7773584905660378, | |
| "grad_norm": 0.3760096804332004, | |
| "learning_rate": 2.2607722439843315e-05, | |
| "loss": 0.5765, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.778867924528302, | |
| "grad_norm": 0.28869156048512906, | |
| "learning_rate": 2.2579742585338555e-05, | |
| "loss": 0.6032, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 1.780377358490566, | |
| "grad_norm": 0.3723280148627601, | |
| "learning_rate": 2.25517627308338e-05, | |
| "loss": 0.6364, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.78188679245283, | |
| "grad_norm": 0.3073245665980812, | |
| "learning_rate": 2.2523782876329046e-05, | |
| "loss": 0.5801, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 1.7833962264150944, | |
| "grad_norm": 0.30275210444327105, | |
| "learning_rate": 2.249580302182429e-05, | |
| "loss": 0.6272, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.7849056603773585, | |
| "grad_norm": 0.3482396962287078, | |
| "learning_rate": 2.2467823167319532e-05, | |
| "loss": 0.6292, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 1.7864150943396226, | |
| "grad_norm": 0.3036844211927974, | |
| "learning_rate": 2.2439843312814772e-05, | |
| "loss": 0.6301, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.7879245283018868, | |
| "grad_norm": 0.36892257197797645, | |
| "learning_rate": 2.2411863458310016e-05, | |
| "loss": 0.601, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.7894339622641509, | |
| "grad_norm": 0.30478398813561736, | |
| "learning_rate": 2.2383883603805262e-05, | |
| "loss": 0.5778, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.7909433962264152, | |
| "grad_norm": 0.41357451178016175, | |
| "learning_rate": 2.2355903749300506e-05, | |
| "loss": 0.6415, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 1.7924528301886793, | |
| "grad_norm": 0.40936383915996677, | |
| "learning_rate": 2.232792389479575e-05, | |
| "loss": 0.6318, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.7939622641509434, | |
| "grad_norm": 0.3305974308413721, | |
| "learning_rate": 2.229994404029099e-05, | |
| "loss": 0.6276, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 1.7954716981132075, | |
| "grad_norm": 0.44756079605901494, | |
| "learning_rate": 2.2271964185786236e-05, | |
| "loss": 0.5627, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.7969811320754716, | |
| "grad_norm": 0.32668591688032894, | |
| "learning_rate": 2.224398433128148e-05, | |
| "loss": 0.5553, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 1.798490566037736, | |
| "grad_norm": 0.4056153389625477, | |
| "learning_rate": 2.2216004476776723e-05, | |
| "loss": 0.6964, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.36423242716401244, | |
| "learning_rate": 2.2188024622271966e-05, | |
| "loss": 0.6385, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 1.8015094339622642, | |
| "grad_norm": 0.3714533894188977, | |
| "learning_rate": 2.2160044767767206e-05, | |
| "loss": 0.5732, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.8030188679245283, | |
| "grad_norm": 0.4092859262603853, | |
| "learning_rate": 2.2132064913262453e-05, | |
| "loss": 0.6767, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.8045283018867924, | |
| "grad_norm": 0.4137797947792045, | |
| "learning_rate": 2.2104085058757696e-05, | |
| "loss": 0.5931, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.8060377358490567, | |
| "grad_norm": 0.34744989842603025, | |
| "learning_rate": 2.207610520425294e-05, | |
| "loss": 0.6201, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 1.8075471698113208, | |
| "grad_norm": 0.3331997614705039, | |
| "learning_rate": 2.2048125349748183e-05, | |
| "loss": 0.6613, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.809056603773585, | |
| "grad_norm": 0.511957469574061, | |
| "learning_rate": 2.2020145495243423e-05, | |
| "loss": 0.5911, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 1.810566037735849, | |
| "grad_norm": 0.33360941658253085, | |
| "learning_rate": 2.199216564073867e-05, | |
| "loss": 0.5849, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.8120754716981131, | |
| "grad_norm": 0.3741495659072703, | |
| "learning_rate": 2.1964185786233913e-05, | |
| "loss": 0.609, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 1.8135849056603774, | |
| "grad_norm": 0.4742678582136245, | |
| "learning_rate": 2.1936205931729156e-05, | |
| "loss": 0.6193, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.8150943396226416, | |
| "grad_norm": 0.44917206739099247, | |
| "learning_rate": 2.19082260772244e-05, | |
| "loss": 0.692, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 1.8166037735849057, | |
| "grad_norm": 0.394176326005825, | |
| "learning_rate": 2.1880246222719643e-05, | |
| "loss": 0.6738, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.8181132075471698, | |
| "grad_norm": 0.43066185484760017, | |
| "learning_rate": 2.1852266368214886e-05, | |
| "loss": 0.642, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.8196226415094339, | |
| "grad_norm": 0.3418450949122748, | |
| "learning_rate": 2.182428651371013e-05, | |
| "loss": 0.6076, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.8211320754716982, | |
| "grad_norm": 0.3652282110342225, | |
| "learning_rate": 2.1796306659205373e-05, | |
| "loss": 0.6063, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 1.8226415094339623, | |
| "grad_norm": 0.30250644712093494, | |
| "learning_rate": 2.1768326804700616e-05, | |
| "loss": 0.5631, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.8241509433962264, | |
| "grad_norm": 0.3288737231812076, | |
| "learning_rate": 2.174034695019586e-05, | |
| "loss": 0.592, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 1.8256603773584905, | |
| "grad_norm": 0.33567232845354483, | |
| "learning_rate": 2.1712367095691103e-05, | |
| "loss": 0.6278, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.8271698113207546, | |
| "grad_norm": 0.3277915063716108, | |
| "learning_rate": 2.1684387241186346e-05, | |
| "loss": 0.6181, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 1.828679245283019, | |
| "grad_norm": 0.3265756953441019, | |
| "learning_rate": 2.165640738668159e-05, | |
| "loss": 0.6163, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.830188679245283, | |
| "grad_norm": 0.36101523587088497, | |
| "learning_rate": 2.1628427532176833e-05, | |
| "loss": 0.6528, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 1.8316981132075472, | |
| "grad_norm": 0.3559998541778918, | |
| "learning_rate": 2.1600447677672077e-05, | |
| "loss": 0.5998, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.8332075471698113, | |
| "grad_norm": 0.293141053328167, | |
| "learning_rate": 2.157246782316732e-05, | |
| "loss": 0.5898, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.8347169811320754, | |
| "grad_norm": 0.29122255758943166, | |
| "learning_rate": 2.1544487968662563e-05, | |
| "loss": 0.6354, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.8362264150943397, | |
| "grad_norm": 1.045084927109783, | |
| "learning_rate": 2.1516508114157807e-05, | |
| "loss": 0.5911, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 1.8377358490566038, | |
| "grad_norm": 0.31757940698589965, | |
| "learning_rate": 2.148852825965305e-05, | |
| "loss": 0.6768, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.839245283018868, | |
| "grad_norm": 0.32235934503323127, | |
| "learning_rate": 2.1460548405148297e-05, | |
| "loss": 0.6418, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 1.840754716981132, | |
| "grad_norm": 0.3428069649816916, | |
| "learning_rate": 2.1432568550643537e-05, | |
| "loss": 0.6437, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.8422641509433961, | |
| "grad_norm": 0.3059598449926168, | |
| "learning_rate": 2.140458869613878e-05, | |
| "loss": 0.6399, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 1.8437735849056605, | |
| "grad_norm": 0.29285459882489406, | |
| "learning_rate": 2.1376608841634023e-05, | |
| "loss": 0.6112, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.8452830188679246, | |
| "grad_norm": 1.0373182604259723, | |
| "learning_rate": 2.1348628987129267e-05, | |
| "loss": 0.6441, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 1.8467924528301887, | |
| "grad_norm": 0.2824305236439785, | |
| "learning_rate": 2.1320649132624514e-05, | |
| "loss": 0.6141, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.8483018867924528, | |
| "grad_norm": 0.40148675247914556, | |
| "learning_rate": 2.1292669278119754e-05, | |
| "loss": 0.6118, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.8498113207547169, | |
| "grad_norm": 0.2981810451237384, | |
| "learning_rate": 2.1264689423614997e-05, | |
| "loss": 0.6731, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.8513207547169812, | |
| "grad_norm": 2.584492019717478, | |
| "learning_rate": 2.123670956911024e-05, | |
| "loss": 0.616, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 1.8528301886792453, | |
| "grad_norm": 0.3591861226985177, | |
| "learning_rate": 2.1208729714605484e-05, | |
| "loss": 0.6236, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.8543396226415094, | |
| "grad_norm": 0.3095222847651958, | |
| "learning_rate": 2.118074986010073e-05, | |
| "loss": 0.6523, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 1.8558490566037738, | |
| "grad_norm": 0.3424147285170496, | |
| "learning_rate": 2.1152770005595974e-05, | |
| "loss": 0.6396, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.8573584905660376, | |
| "grad_norm": 0.314080508242106, | |
| "learning_rate": 2.1124790151091214e-05, | |
| "loss": 0.5968, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 1.858867924528302, | |
| "grad_norm": 0.29525316784574474, | |
| "learning_rate": 2.1096810296586457e-05, | |
| "loss": 0.6179, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.860377358490566, | |
| "grad_norm": 0.3262826338661236, | |
| "learning_rate": 2.1068830442081704e-05, | |
| "loss": 0.6361, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 1.8618867924528302, | |
| "grad_norm": 0.30483158690049045, | |
| "learning_rate": 2.1040850587576947e-05, | |
| "loss": 0.5972, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.8633962264150945, | |
| "grad_norm": 0.2998154483089429, | |
| "learning_rate": 2.101287073307219e-05, | |
| "loss": 0.6319, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.8649056603773584, | |
| "grad_norm": 0.3132929823968501, | |
| "learning_rate": 2.098489087856743e-05, | |
| "loss": 0.6272, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.8664150943396227, | |
| "grad_norm": 0.3644256019767862, | |
| "learning_rate": 2.0956911024062674e-05, | |
| "loss": 0.6275, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 1.8679245283018868, | |
| "grad_norm": 0.3619907633958512, | |
| "learning_rate": 2.092893116955792e-05, | |
| "loss": 0.6324, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.869433962264151, | |
| "grad_norm": 0.30644024116691077, | |
| "learning_rate": 2.0900951315053164e-05, | |
| "loss": 0.6487, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 1.8709433962264153, | |
| "grad_norm": 0.32101407266001664, | |
| "learning_rate": 2.0872971460548407e-05, | |
| "loss": 0.5942, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.8724528301886791, | |
| "grad_norm": 0.3574797990980977, | |
| "learning_rate": 2.0844991606043647e-05, | |
| "loss": 0.6058, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 1.8739622641509435, | |
| "grad_norm": 0.2731432397008649, | |
| "learning_rate": 2.081701175153889e-05, | |
| "loss": 0.5838, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.8754716981132076, | |
| "grad_norm": 0.3081971583676486, | |
| "learning_rate": 2.0789031897034137e-05, | |
| "loss": 0.5985, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 1.8769811320754717, | |
| "grad_norm": 0.4638755248661489, | |
| "learning_rate": 2.076105204252938e-05, | |
| "loss": 0.5448, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.878490566037736, | |
| "grad_norm": 0.36428569342156447, | |
| "learning_rate": 2.0733072188024624e-05, | |
| "loss": 0.6677, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.32794651307072614, | |
| "learning_rate": 2.0705092333519864e-05, | |
| "loss": 0.6155, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.8815094339622642, | |
| "grad_norm": 0.817788237288649, | |
| "learning_rate": 2.067711247901511e-05, | |
| "loss": 0.6651, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 1.8830188679245283, | |
| "grad_norm": 0.32900653987481365, | |
| "learning_rate": 2.0649132624510354e-05, | |
| "loss": 0.6534, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.8845283018867924, | |
| "grad_norm": 0.354259188328193, | |
| "learning_rate": 2.0621152770005598e-05, | |
| "loss": 0.5931, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 1.8860377358490568, | |
| "grad_norm": 0.3280985985620716, | |
| "learning_rate": 2.059317291550084e-05, | |
| "loss": 0.6111, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.8875471698113206, | |
| "grad_norm": 0.3875058797193474, | |
| "learning_rate": 2.056519306099608e-05, | |
| "loss": 0.5952, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 1.889056603773585, | |
| "grad_norm": 0.37671901941800245, | |
| "learning_rate": 2.0537213206491328e-05, | |
| "loss": 0.6522, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.890566037735849, | |
| "grad_norm": 0.4128531980352486, | |
| "learning_rate": 2.050923335198657e-05, | |
| "loss": 0.6864, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 1.8920754716981132, | |
| "grad_norm": 0.3164903698692737, | |
| "learning_rate": 2.0481253497481814e-05, | |
| "loss": 0.5493, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.8935849056603775, | |
| "grad_norm": 0.3960462076340831, | |
| "learning_rate": 2.0453273642977058e-05, | |
| "loss": 0.5976, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.8950943396226414, | |
| "grad_norm": 0.35480881312131624, | |
| "learning_rate": 2.04252937884723e-05, | |
| "loss": 0.5845, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.8966037735849057, | |
| "grad_norm": 0.34522057536500367, | |
| "learning_rate": 2.0397313933967545e-05, | |
| "loss": 0.6556, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 1.8981132075471698, | |
| "grad_norm": 0.337296443279804, | |
| "learning_rate": 2.0369334079462788e-05, | |
| "loss": 0.6814, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.899622641509434, | |
| "grad_norm": 0.2805168811056917, | |
| "learning_rate": 2.034135422495803e-05, | |
| "loss": 0.6272, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 1.9011320754716983, | |
| "grad_norm": 0.3449624394911455, | |
| "learning_rate": 2.0313374370453275e-05, | |
| "loss": 0.6474, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.9026415094339622, | |
| "grad_norm": 0.3280572211846723, | |
| "learning_rate": 2.0285394515948518e-05, | |
| "loss": 0.6347, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 1.9041509433962265, | |
| "grad_norm": 0.35714732927573933, | |
| "learning_rate": 2.025741466144376e-05, | |
| "loss": 0.5295, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.9056603773584906, | |
| "grad_norm": 0.35049276342588254, | |
| "learning_rate": 2.0229434806939005e-05, | |
| "loss": 0.586, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 1.9071698113207547, | |
| "grad_norm": 0.36879482492260923, | |
| "learning_rate": 2.0201454952434248e-05, | |
| "loss": 0.5682, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.908679245283019, | |
| "grad_norm": 0.300565774847212, | |
| "learning_rate": 2.017347509792949e-05, | |
| "loss": 0.6407, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.910188679245283, | |
| "grad_norm": 0.29997892836529605, | |
| "learning_rate": 2.0145495243424735e-05, | |
| "loss": 0.5913, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.9116981132075472, | |
| "grad_norm": 0.3222923618904655, | |
| "learning_rate": 2.0117515388919978e-05, | |
| "loss": 0.6426, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 1.9132075471698113, | |
| "grad_norm": 0.34653857778460406, | |
| "learning_rate": 2.008953553441522e-05, | |
| "loss": 0.629, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.9147169811320754, | |
| "grad_norm": 0.36204978834352647, | |
| "learning_rate": 2.0061555679910465e-05, | |
| "loss": 0.6626, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 1.9162264150943398, | |
| "grad_norm": 0.30703816406770273, | |
| "learning_rate": 2.0033575825405708e-05, | |
| "loss": 0.6269, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.9177358490566037, | |
| "grad_norm": 0.45526307662284454, | |
| "learning_rate": 2.000559597090095e-05, | |
| "loss": 0.6394, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 1.919245283018868, | |
| "grad_norm": 0.3428934663134498, | |
| "learning_rate": 1.9977616116396195e-05, | |
| "loss": 0.5814, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.920754716981132, | |
| "grad_norm": 0.3483198311610776, | |
| "learning_rate": 1.994963626189144e-05, | |
| "loss": 0.6114, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 1.9222641509433962, | |
| "grad_norm": 0.4806434783200171, | |
| "learning_rate": 1.9921656407386682e-05, | |
| "loss": 0.6376, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.9237735849056605, | |
| "grad_norm": 0.3746332153159746, | |
| "learning_rate": 1.9893676552881925e-05, | |
| "loss": 0.6103, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.9252830188679244, | |
| "grad_norm": 0.330995548312209, | |
| "learning_rate": 1.9865696698377172e-05, | |
| "loss": 0.5888, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.9267924528301887, | |
| "grad_norm": 0.33991119023837807, | |
| "learning_rate": 1.9837716843872412e-05, | |
| "loss": 0.6318, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 1.9283018867924528, | |
| "grad_norm": 0.7848019038955028, | |
| "learning_rate": 1.9809736989367655e-05, | |
| "loss": 0.6823, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.929811320754717, | |
| "grad_norm": 0.36581755414914946, | |
| "learning_rate": 1.97817571348629e-05, | |
| "loss": 0.631, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 1.9313207547169813, | |
| "grad_norm": 0.3516181876894466, | |
| "learning_rate": 1.9753777280358142e-05, | |
| "loss": 0.623, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.9328301886792452, | |
| "grad_norm": 0.34076006303037526, | |
| "learning_rate": 1.972579742585339e-05, | |
| "loss": 0.6362, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 1.9343396226415095, | |
| "grad_norm": 0.38953097373298856, | |
| "learning_rate": 1.9697817571348632e-05, | |
| "loss": 0.5828, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.9358490566037736, | |
| "grad_norm": 0.30915722554617087, | |
| "learning_rate": 1.9669837716843872e-05, | |
| "loss": 0.6231, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 1.9373584905660377, | |
| "grad_norm": 0.4125242098883407, | |
| "learning_rate": 1.9641857862339115e-05, | |
| "loss": 0.6137, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.938867924528302, | |
| "grad_norm": 0.306469481947522, | |
| "learning_rate": 1.961387800783436e-05, | |
| "loss": 0.6435, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 1.940377358490566, | |
| "grad_norm": 0.3902186395516134, | |
| "learning_rate": 1.9585898153329605e-05, | |
| "loss": 0.5786, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.9418867924528302, | |
| "grad_norm": 0.42033083017228917, | |
| "learning_rate": 1.955791829882485e-05, | |
| "loss": 0.6479, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 1.9433962264150944, | |
| "grad_norm": 0.376407214561354, | |
| "learning_rate": 1.952993844432009e-05, | |
| "loss": 0.5883, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.9449056603773585, | |
| "grad_norm": 0.3254588344744923, | |
| "learning_rate": 1.9501958589815332e-05, | |
| "loss": 0.6866, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 1.9464150943396228, | |
| "grad_norm": 0.5212042890070583, | |
| "learning_rate": 1.947397873531058e-05, | |
| "loss": 0.667, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.9479245283018867, | |
| "grad_norm": 0.48459611656974566, | |
| "learning_rate": 1.9445998880805822e-05, | |
| "loss": 0.5971, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 1.949433962264151, | |
| "grad_norm": 0.3517856389474132, | |
| "learning_rate": 1.9418019026301066e-05, | |
| "loss": 0.5731, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.950943396226415, | |
| "grad_norm": 0.37784085824797087, | |
| "learning_rate": 1.9390039171796306e-05, | |
| "loss": 0.6935, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 1.9524528301886792, | |
| "grad_norm": 0.4003678989850066, | |
| "learning_rate": 1.936205931729155e-05, | |
| "loss": 0.5787, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.9539622641509435, | |
| "grad_norm": 0.38383200350522656, | |
| "learning_rate": 1.9334079462786796e-05, | |
| "loss": 0.5821, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 1.9554716981132074, | |
| "grad_norm": 0.38567119387350274, | |
| "learning_rate": 1.930609960828204e-05, | |
| "loss": 0.6339, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.9569811320754718, | |
| "grad_norm": 0.3505634513365029, | |
| "learning_rate": 1.9278119753777282e-05, | |
| "loss": 0.6306, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 1.9584905660377359, | |
| "grad_norm": 0.36433000051048686, | |
| "learning_rate": 1.9250139899272522e-05, | |
| "loss": 0.6311, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.32889466881528095, | |
| "learning_rate": 1.9222160044767766e-05, | |
| "loss": 0.5904, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 1.9615094339622643, | |
| "grad_norm": 0.31496527251241535, | |
| "learning_rate": 1.9194180190263013e-05, | |
| "loss": 0.628, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.9630188679245282, | |
| "grad_norm": 3.183835781238465, | |
| "learning_rate": 1.9166200335758256e-05, | |
| "loss": 0.6688, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 1.9645283018867925, | |
| "grad_norm": 2.11988404901896, | |
| "learning_rate": 1.91382204812535e-05, | |
| "loss": 0.5751, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.9660377358490566, | |
| "grad_norm": 0.39102169211455845, | |
| "learning_rate": 1.911024062674874e-05, | |
| "loss": 0.6105, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 1.9675471698113207, | |
| "grad_norm": 0.3356373344184646, | |
| "learning_rate": 1.9082260772243986e-05, | |
| "loss": 0.6691, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.969056603773585, | |
| "grad_norm": 0.33936850902738813, | |
| "learning_rate": 1.905428091773923e-05, | |
| "loss": 0.5818, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 1.970566037735849, | |
| "grad_norm": 0.3125127190755324, | |
| "learning_rate": 1.9026301063234473e-05, | |
| "loss": 0.6903, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.9720754716981133, | |
| "grad_norm": 0.37220176903618524, | |
| "learning_rate": 1.8998321208729716e-05, | |
| "loss": 0.6372, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 1.9735849056603774, | |
| "grad_norm": 0.333323524072569, | |
| "learning_rate": 1.897034135422496e-05, | |
| "loss": 0.5685, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.9750943396226415, | |
| "grad_norm": 0.30576012281180814, | |
| "learning_rate": 1.8942361499720203e-05, | |
| "loss": 0.6038, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 1.9766037735849058, | |
| "grad_norm": 0.3945529751240445, | |
| "learning_rate": 1.8914381645215446e-05, | |
| "loss": 0.6457, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.9781132075471697, | |
| "grad_norm": 0.2774316641209328, | |
| "learning_rate": 1.888640179071069e-05, | |
| "loss": 0.6567, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 1.979622641509434, | |
| "grad_norm": 0.35117826532025936, | |
| "learning_rate": 1.8858421936205933e-05, | |
| "loss": 0.6308, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.9811320754716981, | |
| "grad_norm": 0.3419364217067856, | |
| "learning_rate": 1.8830442081701176e-05, | |
| "loss": 0.6162, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 1.9826415094339622, | |
| "grad_norm": 0.36693025313968536, | |
| "learning_rate": 1.880246222719642e-05, | |
| "loss": 0.634, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.9841509433962266, | |
| "grad_norm": 0.3183259269090567, | |
| "learning_rate": 1.8774482372691663e-05, | |
| "loss": 0.5759, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 1.9856603773584904, | |
| "grad_norm": 2.9550667184423682, | |
| "learning_rate": 1.8746502518186906e-05, | |
| "loss": 0.712, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.9871698113207548, | |
| "grad_norm": 0.36436478637302866, | |
| "learning_rate": 1.871852266368215e-05, | |
| "loss": 0.6046, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 1.9886792452830189, | |
| "grad_norm": 0.3513792552765972, | |
| "learning_rate": 1.8690542809177393e-05, | |
| "loss": 0.6677, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.990188679245283, | |
| "grad_norm": 0.32038155032867643, | |
| "learning_rate": 1.8662562954672636e-05, | |
| "loss": 0.6549, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 1.9916981132075473, | |
| "grad_norm": 0.31445105810605595, | |
| "learning_rate": 1.863458310016788e-05, | |
| "loss": 0.6003, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.9932075471698112, | |
| "grad_norm": 0.4328766003560714, | |
| "learning_rate": 1.8606603245663123e-05, | |
| "loss": 0.6476, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 1.9947169811320755, | |
| "grad_norm": 0.32805033665845823, | |
| "learning_rate": 1.8578623391158366e-05, | |
| "loss": 0.6121, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.9962264150943396, | |
| "grad_norm": 0.3016162458656436, | |
| "learning_rate": 1.855064353665361e-05, | |
| "loss": 0.5952, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 1.9977358490566037, | |
| "grad_norm": 0.3131440934220455, | |
| "learning_rate": 1.8522663682148853e-05, | |
| "loss": 0.5831, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.999245283018868, | |
| "grad_norm": 0.29401127063820814, | |
| "learning_rate": 1.8494683827644097e-05, | |
| "loss": 0.5822, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.49733230922246346, | |
| "learning_rate": 1.846670397313934e-05, | |
| "loss": 0.6198, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 2.0015094339622643, | |
| "grad_norm": 0.35646209321907746, | |
| "learning_rate": 1.8438724118634583e-05, | |
| "loss": 0.4999, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 2.003018867924528, | |
| "grad_norm": 0.3981125944090188, | |
| "learning_rate": 1.8410744264129827e-05, | |
| "loss": 0.4977, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 2.0045283018867925, | |
| "grad_norm": 0.3736032043806837, | |
| "learning_rate": 1.838276440962507e-05, | |
| "loss": 0.541, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 2.0060377358490564, | |
| "grad_norm": 0.30212541569145873, | |
| "learning_rate": 1.8354784555120313e-05, | |
| "loss": 0.4858, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.0075471698113208, | |
| "grad_norm": 0.3073556357585776, | |
| "learning_rate": 1.8326804700615557e-05, | |
| "loss": 0.5015, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 2.009056603773585, | |
| "grad_norm": 0.34113812336109905, | |
| "learning_rate": 1.82988248461108e-05, | |
| "loss": 0.5172, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 2.010566037735849, | |
| "grad_norm": 4.487795810249564, | |
| "learning_rate": 1.8270844991606047e-05, | |
| "loss": 0.5514, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 2.0120754716981133, | |
| "grad_norm": 0.42900541776859996, | |
| "learning_rate": 1.824286513710129e-05, | |
| "loss": 0.4934, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 2.013584905660377, | |
| "grad_norm": 3.065346721241103, | |
| "learning_rate": 1.821488528259653e-05, | |
| "loss": 0.5091, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.0150943396226415, | |
| "grad_norm": 0.40144469758266016, | |
| "learning_rate": 1.8186905428091774e-05, | |
| "loss": 0.5088, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 2.016603773584906, | |
| "grad_norm": 0.40845248070738877, | |
| "learning_rate": 1.8158925573587017e-05, | |
| "loss": 0.5574, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 2.0181132075471697, | |
| "grad_norm": 3.392575632261363, | |
| "learning_rate": 1.8130945719082264e-05, | |
| "loss": 0.5533, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 2.019622641509434, | |
| "grad_norm": 0.45092263682500694, | |
| "learning_rate": 1.8102965864577507e-05, | |
| "loss": 0.5609, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 2.021132075471698, | |
| "grad_norm": 0.37318211497573206, | |
| "learning_rate": 1.8074986010072747e-05, | |
| "loss": 0.5026, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.0226415094339623, | |
| "grad_norm": 0.34400471516145126, | |
| "learning_rate": 1.804700615556799e-05, | |
| "loss": 0.4826, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 2.0241509433962266, | |
| "grad_norm": 0.3500144022243311, | |
| "learning_rate": 1.8019026301063234e-05, | |
| "loss": 0.4663, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 2.0256603773584905, | |
| "grad_norm": 0.34048486473138423, | |
| "learning_rate": 1.799104644655848e-05, | |
| "loss": 0.4835, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 2.027169811320755, | |
| "grad_norm": 0.33541429775955595, | |
| "learning_rate": 1.7963066592053724e-05, | |
| "loss": 0.5427, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 2.0286792452830187, | |
| "grad_norm": 0.3600896985895227, | |
| "learning_rate": 1.7935086737548964e-05, | |
| "loss": 0.516, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 2.030188679245283, | |
| "grad_norm": 0.3429326727185839, | |
| "learning_rate": 1.7907106883044207e-05, | |
| "loss": 0.5144, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 2.0316981132075473, | |
| "grad_norm": 0.31543021450047887, | |
| "learning_rate": 1.7879127028539454e-05, | |
| "loss": 0.5449, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 2.0332075471698112, | |
| "grad_norm": 0.27664831389978795, | |
| "learning_rate": 1.7851147174034697e-05, | |
| "loss": 0.4857, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 2.0347169811320756, | |
| "grad_norm": 0.32371945804571334, | |
| "learning_rate": 1.782316731952994e-05, | |
| "loss": 0.5512, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 2.0362264150943394, | |
| "grad_norm": 0.3123789070192699, | |
| "learning_rate": 1.779518746502518e-05, | |
| "loss": 0.5339, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.0377358490566038, | |
| "grad_norm": 0.3018082555969256, | |
| "learning_rate": 1.7767207610520424e-05, | |
| "loss": 0.5477, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 2.039245283018868, | |
| "grad_norm": 0.330825753840397, | |
| "learning_rate": 1.773922775601567e-05, | |
| "loss": 0.5226, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 2.040754716981132, | |
| "grad_norm": 0.3064556391674686, | |
| "learning_rate": 1.7711247901510914e-05, | |
| "loss": 0.5009, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 2.0422641509433963, | |
| "grad_norm": 0.3279356419954503, | |
| "learning_rate": 1.7683268047006157e-05, | |
| "loss": 0.5007, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 2.04377358490566, | |
| "grad_norm": 0.3562043309709785, | |
| "learning_rate": 1.7655288192501397e-05, | |
| "loss": 0.4928, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 2.0452830188679245, | |
| "grad_norm": 0.2992958064668747, | |
| "learning_rate": 1.762730833799664e-05, | |
| "loss": 0.513, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 2.046792452830189, | |
| "grad_norm": 0.36710409137930194, | |
| "learning_rate": 1.7599328483491888e-05, | |
| "loss": 0.5065, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 2.0483018867924527, | |
| "grad_norm": 0.3134601371847729, | |
| "learning_rate": 1.757134862898713e-05, | |
| "loss": 0.4913, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 2.049811320754717, | |
| "grad_norm": 0.27689129271088303, | |
| "learning_rate": 1.7543368774482374e-05, | |
| "loss": 0.4884, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 2.051320754716981, | |
| "grad_norm": 0.3714023246669663, | |
| "learning_rate": 1.7515388919977618e-05, | |
| "loss": 0.5384, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.0528301886792453, | |
| "grad_norm": 0.2847785796252443, | |
| "learning_rate": 1.748740906547286e-05, | |
| "loss": 0.503, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 2.0543396226415096, | |
| "grad_norm": 0.31030468078337337, | |
| "learning_rate": 1.7459429210968104e-05, | |
| "loss": 0.5054, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 2.0558490566037735, | |
| "grad_norm": 0.2888211469303781, | |
| "learning_rate": 1.7431449356463348e-05, | |
| "loss": 0.5146, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 2.057358490566038, | |
| "grad_norm": 0.3059017261234641, | |
| "learning_rate": 1.740346950195859e-05, | |
| "loss": 0.5527, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 2.0588679245283017, | |
| "grad_norm": 0.2880026611275554, | |
| "learning_rate": 1.7375489647453834e-05, | |
| "loss": 0.5871, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 2.060377358490566, | |
| "grad_norm": 0.3435683903553664, | |
| "learning_rate": 1.7347509792949078e-05, | |
| "loss": 0.4751, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 2.0618867924528304, | |
| "grad_norm": 0.2577466623620189, | |
| "learning_rate": 1.731952993844432e-05, | |
| "loss": 0.5072, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 2.0633962264150942, | |
| "grad_norm": 0.5951239656292413, | |
| "learning_rate": 1.7291550083939565e-05, | |
| "loss": 0.5067, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 2.0649056603773586, | |
| "grad_norm": 0.3091376294357986, | |
| "learning_rate": 1.7263570229434808e-05, | |
| "loss": 0.5628, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 2.0664150943396224, | |
| "grad_norm": 0.3448984710683876, | |
| "learning_rate": 1.723559037493005e-05, | |
| "loss": 0.5315, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.0679245283018868, | |
| "grad_norm": 0.2883682281289665, | |
| "learning_rate": 1.7207610520425295e-05, | |
| "loss": 0.4979, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 2.069433962264151, | |
| "grad_norm": 0.3162892758521297, | |
| "learning_rate": 1.7179630665920538e-05, | |
| "loss": 0.5156, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 2.070943396226415, | |
| "grad_norm": 9.124432005211863, | |
| "learning_rate": 1.715165081141578e-05, | |
| "loss": 0.6299, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 2.0724528301886793, | |
| "grad_norm": 0.3426920377467487, | |
| "learning_rate": 1.7123670956911025e-05, | |
| "loss": 0.5106, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 2.073962264150943, | |
| "grad_norm": 0.3252637771238837, | |
| "learning_rate": 1.7095691102406268e-05, | |
| "loss": 0.465, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 2.0754716981132075, | |
| "grad_norm": 0.27946931242846335, | |
| "learning_rate": 1.706771124790151e-05, | |
| "loss": 0.5466, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 2.076981132075472, | |
| "grad_norm": 0.2950006671302379, | |
| "learning_rate": 1.7039731393396755e-05, | |
| "loss": 0.5233, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 2.0784905660377357, | |
| "grad_norm": 0.3071238734202378, | |
| "learning_rate": 1.7011751538891998e-05, | |
| "loss": 0.4996, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.29000161966129556, | |
| "learning_rate": 1.698377168438724e-05, | |
| "loss": 0.5456, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 2.081509433962264, | |
| "grad_norm": 0.3638090048005039, | |
| "learning_rate": 1.6955791829882485e-05, | |
| "loss": 0.5342, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.0830188679245283, | |
| "grad_norm": 0.3221376007630064, | |
| "learning_rate": 1.6927811975377728e-05, | |
| "loss": 0.506, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 2.0845283018867926, | |
| "grad_norm": 0.2882987076412252, | |
| "learning_rate": 1.689983212087297e-05, | |
| "loss": 0.5291, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 2.0860377358490565, | |
| "grad_norm": 0.28696681372347427, | |
| "learning_rate": 1.6871852266368215e-05, | |
| "loss": 0.4838, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 2.087547169811321, | |
| "grad_norm": 0.38854977880255626, | |
| "learning_rate": 1.684387241186346e-05, | |
| "loss": 0.5473, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 2.0890566037735847, | |
| "grad_norm": 0.29092276215854995, | |
| "learning_rate": 1.6815892557358702e-05, | |
| "loss": 0.4947, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 2.090566037735849, | |
| "grad_norm": 0.3087900936237078, | |
| "learning_rate": 1.678791270285395e-05, | |
| "loss": 0.4894, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 2.0920754716981134, | |
| "grad_norm": 0.3447103919070566, | |
| "learning_rate": 1.675993284834919e-05, | |
| "loss": 0.4929, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 2.0935849056603772, | |
| "grad_norm": 0.2823804807438117, | |
| "learning_rate": 1.6731952993844432e-05, | |
| "loss": 0.5266, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.0950943396226416, | |
| "grad_norm": 2.227830939004405, | |
| "learning_rate": 1.6703973139339675e-05, | |
| "loss": 0.4667, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 2.0966037735849055, | |
| "grad_norm": 0.36027609216088624, | |
| "learning_rate": 1.6675993284834922e-05, | |
| "loss": 0.5089, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.09811320754717, | |
| "grad_norm": 0.30939324253732786, | |
| "learning_rate": 1.6648013430330165e-05, | |
| "loss": 0.509, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 2.099622641509434, | |
| "grad_norm": 0.2940056905558936, | |
| "learning_rate": 1.6620033575825405e-05, | |
| "loss": 0.5088, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 2.101132075471698, | |
| "grad_norm": 0.2906000756858416, | |
| "learning_rate": 1.659205372132065e-05, | |
| "loss": 0.496, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 2.1026415094339623, | |
| "grad_norm": 0.2869459547310367, | |
| "learning_rate": 1.6564073866815892e-05, | |
| "loss": 0.5428, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 2.104150943396226, | |
| "grad_norm": 0.2999230365705766, | |
| "learning_rate": 1.653609401231114e-05, | |
| "loss": 0.4922, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 2.1056603773584905, | |
| "grad_norm": 0.2962964554482559, | |
| "learning_rate": 1.6508114157806382e-05, | |
| "loss": 0.507, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 2.107169811320755, | |
| "grad_norm": 0.2795439894392488, | |
| "learning_rate": 1.6480134303301622e-05, | |
| "loss": 0.5061, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 2.1086792452830188, | |
| "grad_norm": 0.31442101897336405, | |
| "learning_rate": 1.6452154448796865e-05, | |
| "loss": 0.5429, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 2.110188679245283, | |
| "grad_norm": 0.29732567961373857, | |
| "learning_rate": 1.642417459429211e-05, | |
| "loss": 0.5197, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 2.111698113207547, | |
| "grad_norm": 0.2869692773071042, | |
| "learning_rate": 1.6396194739787356e-05, | |
| "loss": 0.5009, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.1132075471698113, | |
| "grad_norm": 0.31504920038578266, | |
| "learning_rate": 1.63682148852826e-05, | |
| "loss": 0.5181, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 2.1147169811320756, | |
| "grad_norm": 0.31546568417414367, | |
| "learning_rate": 1.634023503077784e-05, | |
| "loss": 0.4921, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 2.1162264150943395, | |
| "grad_norm": 0.5023731953716996, | |
| "learning_rate": 1.6312255176273082e-05, | |
| "loss": 0.5294, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 2.117735849056604, | |
| "grad_norm": 0.2997062635797708, | |
| "learning_rate": 1.628427532176833e-05, | |
| "loss": 0.5035, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 2.1192452830188677, | |
| "grad_norm": 6.197559051784822, | |
| "learning_rate": 1.6256295467263572e-05, | |
| "loss": 1.1389, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 2.120754716981132, | |
| "grad_norm": 0.37051838525151753, | |
| "learning_rate": 1.6228315612758816e-05, | |
| "loss": 0.4735, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 2.1222641509433964, | |
| "grad_norm": 0.34610400457497426, | |
| "learning_rate": 1.6200335758254056e-05, | |
| "loss": 0.5028, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 2.1237735849056603, | |
| "grad_norm": 0.29036868542009187, | |
| "learning_rate": 1.61723559037493e-05, | |
| "loss": 0.5034, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 2.1252830188679246, | |
| "grad_norm": 0.2800323997242073, | |
| "learning_rate": 1.6144376049244546e-05, | |
| "loss": 0.502, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 2.1267924528301885, | |
| "grad_norm": 0.38872365691156985, | |
| "learning_rate": 1.611639619473979e-05, | |
| "loss": 0.5192, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.128301886792453, | |
| "grad_norm": 0.3370959225184659, | |
| "learning_rate": 1.6088416340235033e-05, | |
| "loss": 0.6035, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 2.129811320754717, | |
| "grad_norm": 0.27435668970687543, | |
| "learning_rate": 1.6060436485730273e-05, | |
| "loss": 0.51, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 2.131320754716981, | |
| "grad_norm": 1.7820096334751305, | |
| "learning_rate": 1.6032456631225516e-05, | |
| "loss": 0.5151, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 2.1328301886792453, | |
| "grad_norm": 0.383674413890331, | |
| "learning_rate": 1.6004476776720763e-05, | |
| "loss": 0.5579, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 2.1343396226415092, | |
| "grad_norm": 0.328537968912913, | |
| "learning_rate": 1.5976496922216006e-05, | |
| "loss": 0.495, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 2.1358490566037736, | |
| "grad_norm": 0.3243298723276784, | |
| "learning_rate": 1.594851706771125e-05, | |
| "loss": 0.5264, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 2.137358490566038, | |
| "grad_norm": 0.32700388956318677, | |
| "learning_rate": 1.5920537213206493e-05, | |
| "loss": 0.4996, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 2.1388679245283018, | |
| "grad_norm": 0.3322839065657285, | |
| "learning_rate": 1.5892557358701736e-05, | |
| "loss": 0.5223, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 2.140377358490566, | |
| "grad_norm": 0.3004552967389372, | |
| "learning_rate": 1.586457750419698e-05, | |
| "loss": 0.5064, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 2.14188679245283, | |
| "grad_norm": 0.5763848814349466, | |
| "learning_rate": 1.5836597649692223e-05, | |
| "loss": 0.5033, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.1433962264150943, | |
| "grad_norm": 0.32960695702396287, | |
| "learning_rate": 1.5808617795187466e-05, | |
| "loss": 0.5373, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 2.1449056603773586, | |
| "grad_norm": 0.3107640748412959, | |
| "learning_rate": 1.578063794068271e-05, | |
| "loss": 0.5038, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.1464150943396225, | |
| "grad_norm": 0.30299740916002305, | |
| "learning_rate": 1.5752658086177953e-05, | |
| "loss": 0.4905, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 2.147924528301887, | |
| "grad_norm": 0.3175005717166028, | |
| "learning_rate": 1.5724678231673196e-05, | |
| "loss": 0.5247, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 2.149433962264151, | |
| "grad_norm": 0.38504686240959507, | |
| "learning_rate": 1.569669837716844e-05, | |
| "loss": 0.4848, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 2.150943396226415, | |
| "grad_norm": 0.29861363410964187, | |
| "learning_rate": 1.5668718522663683e-05, | |
| "loss": 0.5272, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 2.1524528301886794, | |
| "grad_norm": 0.3048000067062521, | |
| "learning_rate": 1.5640738668158926e-05, | |
| "loss": 0.523, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 2.1539622641509433, | |
| "grad_norm": 0.29199245961301584, | |
| "learning_rate": 1.561275881365417e-05, | |
| "loss": 0.4737, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 2.1554716981132076, | |
| "grad_norm": 0.324024535729314, | |
| "learning_rate": 1.5584778959149413e-05, | |
| "loss": 0.5564, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 2.1569811320754715, | |
| "grad_norm": 0.2706042276628025, | |
| "learning_rate": 1.5556799104644656e-05, | |
| "loss": 0.4991, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.158490566037736, | |
| "grad_norm": 0.31292251156779505, | |
| "learning_rate": 1.55288192501399e-05, | |
| "loss": 0.5495, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.28435967603261125, | |
| "learning_rate": 1.5500839395635143e-05, | |
| "loss": 0.5318, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 2.161509433962264, | |
| "grad_norm": 0.28290949197621923, | |
| "learning_rate": 1.5472859541130387e-05, | |
| "loss": 0.475, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 2.1630188679245284, | |
| "grad_norm": 0.27419716811272765, | |
| "learning_rate": 1.544487968662563e-05, | |
| "loss": 0.512, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 2.1645283018867927, | |
| "grad_norm": 0.33229780717562507, | |
| "learning_rate": 1.5416899832120873e-05, | |
| "loss": 0.5253, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 2.1660377358490566, | |
| "grad_norm": 0.2635435711478846, | |
| "learning_rate": 1.5388919977616117e-05, | |
| "loss": 0.4922, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 2.167547169811321, | |
| "grad_norm": 0.3354287803738388, | |
| "learning_rate": 1.536094012311136e-05, | |
| "loss": 0.5607, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 2.169056603773585, | |
| "grad_norm": 0.2982433296429279, | |
| "learning_rate": 1.5332960268606603e-05, | |
| "loss": 0.53, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 2.170566037735849, | |
| "grad_norm": 0.2744678686241482, | |
| "learning_rate": 1.5304980414101847e-05, | |
| "loss": 0.4821, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 2.172075471698113, | |
| "grad_norm": 0.30144942338602565, | |
| "learning_rate": 1.527700055959709e-05, | |
| "loss": 0.5545, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.1735849056603773, | |
| "grad_norm": 0.2890165107307089, | |
| "learning_rate": 1.5249020705092335e-05, | |
| "loss": 0.4918, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 2.1750943396226416, | |
| "grad_norm": 0.27397501117313733, | |
| "learning_rate": 1.5221040850587578e-05, | |
| "loss": 0.5342, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 2.1766037735849055, | |
| "grad_norm": 0.2948193831715891, | |
| "learning_rate": 1.5193060996082822e-05, | |
| "loss": 0.493, | |
| "step": 1443 | |
| }, | |
| { | |
| "epoch": 2.17811320754717, | |
| "grad_norm": 0.31953210199754906, | |
| "learning_rate": 1.5165081141578064e-05, | |
| "loss": 0.5324, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 2.179622641509434, | |
| "grad_norm": 0.34514765305750156, | |
| "learning_rate": 1.5137101287073307e-05, | |
| "loss": 0.4684, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 2.181132075471698, | |
| "grad_norm": 0.30888012716661545, | |
| "learning_rate": 1.5109121432568552e-05, | |
| "loss": 0.568, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 2.1826415094339624, | |
| "grad_norm": 0.31339189400441597, | |
| "learning_rate": 1.5081141578063795e-05, | |
| "loss": 0.5351, | |
| "step": 1447 | |
| }, | |
| { | |
| "epoch": 2.1841509433962263, | |
| "grad_norm": 0.2850461935374051, | |
| "learning_rate": 1.5053161723559039e-05, | |
| "loss": 0.4853, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 2.1856603773584906, | |
| "grad_norm": 0.2827499266089179, | |
| "learning_rate": 1.502518186905428e-05, | |
| "loss": 0.5234, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 2.1871698113207545, | |
| "grad_norm": 0.3015886512697336, | |
| "learning_rate": 1.4997202014549525e-05, | |
| "loss": 0.5494, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.188679245283019, | |
| "grad_norm": 0.2849817847705387, | |
| "learning_rate": 1.4969222160044769e-05, | |
| "loss": 0.5304, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 2.190188679245283, | |
| "grad_norm": 0.40838242894053084, | |
| "learning_rate": 1.4941242305540012e-05, | |
| "loss": 0.5282, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 2.191698113207547, | |
| "grad_norm": 0.3487341672894736, | |
| "learning_rate": 1.4913262451035257e-05, | |
| "loss": 0.5272, | |
| "step": 1453 | |
| }, | |
| { | |
| "epoch": 2.1932075471698114, | |
| "grad_norm": 0.28796501873449454, | |
| "learning_rate": 1.4885282596530497e-05, | |
| "loss": 0.4951, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 2.1947169811320757, | |
| "grad_norm": 0.2684130709861942, | |
| "learning_rate": 1.4857302742025742e-05, | |
| "loss": 0.4948, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 2.1962264150943396, | |
| "grad_norm": 0.27600668574894804, | |
| "learning_rate": 1.4829322887520986e-05, | |
| "loss": 0.4908, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 2.197735849056604, | |
| "grad_norm": 0.2588348566679071, | |
| "learning_rate": 1.4801343033016229e-05, | |
| "loss": 0.491, | |
| "step": 1457 | |
| }, | |
| { | |
| "epoch": 2.199245283018868, | |
| "grad_norm": 0.2987400337756697, | |
| "learning_rate": 1.4773363178511474e-05, | |
| "loss": 0.5446, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 2.200754716981132, | |
| "grad_norm": 0.25621718687856143, | |
| "learning_rate": 1.4745383324006714e-05, | |
| "loss": 0.4764, | |
| "step": 1459 | |
| }, | |
| { | |
| "epoch": 2.202264150943396, | |
| "grad_norm": 0.7981297027382572, | |
| "learning_rate": 1.4717403469501959e-05, | |
| "loss": 0.4999, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.2037735849056603, | |
| "grad_norm": 0.2965666251899607, | |
| "learning_rate": 1.4689423614997202e-05, | |
| "loss": 0.535, | |
| "step": 1461 | |
| }, | |
| { | |
| "epoch": 2.2052830188679247, | |
| "grad_norm": 0.2804521739277098, | |
| "learning_rate": 1.4661443760492446e-05, | |
| "loss": 0.4918, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 2.2067924528301885, | |
| "grad_norm": 0.276142140919494, | |
| "learning_rate": 1.463346390598769e-05, | |
| "loss": 0.4702, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 2.208301886792453, | |
| "grad_norm": 0.2820984265166286, | |
| "learning_rate": 1.4605484051482932e-05, | |
| "loss": 0.4939, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 2.209811320754717, | |
| "grad_norm": 0.7192982932338521, | |
| "learning_rate": 1.4577504196978176e-05, | |
| "loss": 0.5609, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 2.211320754716981, | |
| "grad_norm": 0.2937186576584232, | |
| "learning_rate": 1.454952434247342e-05, | |
| "loss": 0.5353, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 2.2128301886792454, | |
| "grad_norm": 0.31277570600962407, | |
| "learning_rate": 1.4521544487968664e-05, | |
| "loss": 0.4838, | |
| "step": 1467 | |
| }, | |
| { | |
| "epoch": 2.2143396226415093, | |
| "grad_norm": 0.3055256840051491, | |
| "learning_rate": 1.4493564633463908e-05, | |
| "loss": 0.5433, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 2.2158490566037736, | |
| "grad_norm": 0.31560711080139303, | |
| "learning_rate": 1.4465584778959151e-05, | |
| "loss": 0.5337, | |
| "step": 1469 | |
| }, | |
| { | |
| "epoch": 2.2173584905660375, | |
| "grad_norm": 0.29251628786569595, | |
| "learning_rate": 1.4437604924454393e-05, | |
| "loss": 0.5144, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.218867924528302, | |
| "grad_norm": 0.3049591268418919, | |
| "learning_rate": 1.4409625069949636e-05, | |
| "loss": 0.5322, | |
| "step": 1471 | |
| }, | |
| { | |
| "epoch": 2.220377358490566, | |
| "grad_norm": 0.3094888112301788, | |
| "learning_rate": 1.4381645215444881e-05, | |
| "loss": 0.5531, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 2.22188679245283, | |
| "grad_norm": 0.272447846375912, | |
| "learning_rate": 1.4353665360940124e-05, | |
| "loss": 0.4913, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 2.2233962264150944, | |
| "grad_norm": 0.3123501475624154, | |
| "learning_rate": 1.4325685506435368e-05, | |
| "loss": 0.5543, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 2.2249056603773587, | |
| "grad_norm": 0.272171738262368, | |
| "learning_rate": 1.429770565193061e-05, | |
| "loss": 0.5062, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 2.2264150943396226, | |
| "grad_norm": 0.34071458995993736, | |
| "learning_rate": 1.4269725797425853e-05, | |
| "loss": 0.4612, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 2.227924528301887, | |
| "grad_norm": 0.3207863817121142, | |
| "learning_rate": 1.4241745942921098e-05, | |
| "loss": 0.5399, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 2.229433962264151, | |
| "grad_norm": 0.3312395334585876, | |
| "learning_rate": 1.4213766088416341e-05, | |
| "loss": 0.5551, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 2.230943396226415, | |
| "grad_norm": 0.3078119559299569, | |
| "learning_rate": 1.4185786233911586e-05, | |
| "loss": 0.5399, | |
| "step": 1479 | |
| }, | |
| { | |
| "epoch": 2.232452830188679, | |
| "grad_norm": 0.3677416007012839, | |
| "learning_rate": 1.4157806379406826e-05, | |
| "loss": 0.4692, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.2339622641509433, | |
| "grad_norm": 0.29294788473317285, | |
| "learning_rate": 1.4129826524902071e-05, | |
| "loss": 0.5189, | |
| "step": 1481 | |
| }, | |
| { | |
| "epoch": 2.2354716981132077, | |
| "grad_norm": 0.2967231058765099, | |
| "learning_rate": 1.4101846670397315e-05, | |
| "loss": 0.4883, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 2.2369811320754716, | |
| "grad_norm": 0.28070084795251554, | |
| "learning_rate": 1.4073866815892558e-05, | |
| "loss": 0.5129, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 2.238490566037736, | |
| "grad_norm": 0.27025440257233807, | |
| "learning_rate": 1.4045886961387803e-05, | |
| "loss": 0.5129, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.3278016709506503, | |
| "learning_rate": 1.4017907106883043e-05, | |
| "loss": 0.5552, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 2.241509433962264, | |
| "grad_norm": 0.2677486192645805, | |
| "learning_rate": 1.3989927252378288e-05, | |
| "loss": 0.505, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 2.2430188679245284, | |
| "grad_norm": 0.27640060062320465, | |
| "learning_rate": 1.3961947397873532e-05, | |
| "loss": 0.5228, | |
| "step": 1487 | |
| }, | |
| { | |
| "epoch": 2.2445283018867923, | |
| "grad_norm": 0.32687355578285887, | |
| "learning_rate": 1.3933967543368775e-05, | |
| "loss": 0.5329, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 2.2460377358490566, | |
| "grad_norm": 0.28290727880482414, | |
| "learning_rate": 1.390598768886402e-05, | |
| "loss": 0.5653, | |
| "step": 1489 | |
| }, | |
| { | |
| "epoch": 2.2475471698113205, | |
| "grad_norm": 0.2945089980775346, | |
| "learning_rate": 1.387800783435926e-05, | |
| "loss": 0.5558, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.249056603773585, | |
| "grad_norm": 0.24883863133395934, | |
| "learning_rate": 1.3850027979854505e-05, | |
| "loss": 0.4777, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 2.250566037735849, | |
| "grad_norm": 0.2864405466136228, | |
| "learning_rate": 1.3822048125349748e-05, | |
| "loss": 0.5316, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 2.252075471698113, | |
| "grad_norm": 0.29305064929033103, | |
| "learning_rate": 1.3794068270844993e-05, | |
| "loss": 0.5115, | |
| "step": 1493 | |
| }, | |
| { | |
| "epoch": 2.2535849056603774, | |
| "grad_norm": 0.2657764273856441, | |
| "learning_rate": 1.3766088416340237e-05, | |
| "loss": 0.496, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 2.2550943396226417, | |
| "grad_norm": 0.29966606901721604, | |
| "learning_rate": 1.373810856183548e-05, | |
| "loss": 0.5369, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 2.2566037735849056, | |
| "grad_norm": 0.32608189723105613, | |
| "learning_rate": 1.3710128707330722e-05, | |
| "loss": 0.5373, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 2.25811320754717, | |
| "grad_norm": 0.2698321482337212, | |
| "learning_rate": 1.3682148852825965e-05, | |
| "loss": 0.5642, | |
| "step": 1497 | |
| }, | |
| { | |
| "epoch": 2.259622641509434, | |
| "grad_norm": 0.2862557953990325, | |
| "learning_rate": 1.365416899832121e-05, | |
| "loss": 0.5419, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 2.261132075471698, | |
| "grad_norm": 0.2610202337731504, | |
| "learning_rate": 1.3626189143816454e-05, | |
| "loss": 0.4617, | |
| "step": 1499 | |
| }, | |
| { | |
| "epoch": 2.262641509433962, | |
| "grad_norm": 0.2756743186352737, | |
| "learning_rate": 1.3598209289311697e-05, | |
| "loss": 0.5105, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.2641509433962264, | |
| "grad_norm": 0.318550644447325, | |
| "learning_rate": 1.3570229434806939e-05, | |
| "loss": 0.5459, | |
| "step": 1501 | |
| }, | |
| { | |
| "epoch": 2.2656603773584907, | |
| "grad_norm": 0.27546152249243994, | |
| "learning_rate": 1.3542249580302182e-05, | |
| "loss": 0.5206, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 2.2671698113207546, | |
| "grad_norm": 0.29381799157195004, | |
| "learning_rate": 1.3514269725797427e-05, | |
| "loss": 0.4987, | |
| "step": 1503 | |
| }, | |
| { | |
| "epoch": 2.268679245283019, | |
| "grad_norm": 0.295754784857654, | |
| "learning_rate": 1.348628987129267e-05, | |
| "loss": 0.5064, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 2.2701886792452832, | |
| "grad_norm": 0.2918834331491631, | |
| "learning_rate": 1.3458310016787914e-05, | |
| "loss": 0.571, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 2.271698113207547, | |
| "grad_norm": 0.256431144636905, | |
| "learning_rate": 1.3430330162283155e-05, | |
| "loss": 0.4934, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 2.2732075471698114, | |
| "grad_norm": 0.2836824188188069, | |
| "learning_rate": 1.34023503077784e-05, | |
| "loss": 0.5082, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 2.2747169811320753, | |
| "grad_norm": 0.29318207704716076, | |
| "learning_rate": 1.3374370453273644e-05, | |
| "loss": 0.5, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 2.2762264150943397, | |
| "grad_norm": 0.271941956506678, | |
| "learning_rate": 1.3346390598768887e-05, | |
| "loss": 0.5325, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 2.2777358490566035, | |
| "grad_norm": 0.2855337211319555, | |
| "learning_rate": 1.3318410744264132e-05, | |
| "loss": 0.4959, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.279245283018868, | |
| "grad_norm": 0.2949051630826, | |
| "learning_rate": 1.3290430889759372e-05, | |
| "loss": 0.5202, | |
| "step": 1511 | |
| }, | |
| { | |
| "epoch": 2.280754716981132, | |
| "grad_norm": 0.27050450657637826, | |
| "learning_rate": 1.3262451035254617e-05, | |
| "loss": 0.5023, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 2.282264150943396, | |
| "grad_norm": 0.291946921230413, | |
| "learning_rate": 1.323447118074986e-05, | |
| "loss": 0.5177, | |
| "step": 1513 | |
| }, | |
| { | |
| "epoch": 2.2837735849056604, | |
| "grad_norm": 0.27140194708974613, | |
| "learning_rate": 1.3206491326245104e-05, | |
| "loss": 0.4802, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 2.2852830188679247, | |
| "grad_norm": 0.28632708768258713, | |
| "learning_rate": 1.3178511471740349e-05, | |
| "loss": 0.5031, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 2.2867924528301886, | |
| "grad_norm": 0.2653378913781436, | |
| "learning_rate": 1.3150531617235589e-05, | |
| "loss": 0.5348, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 2.288301886792453, | |
| "grad_norm": 0.3416254176656584, | |
| "learning_rate": 1.3122551762730834e-05, | |
| "loss": 0.5115, | |
| "step": 1517 | |
| }, | |
| { | |
| "epoch": 2.289811320754717, | |
| "grad_norm": 0.3132158552150094, | |
| "learning_rate": 1.3094571908226077e-05, | |
| "loss": 0.5507, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 2.291320754716981, | |
| "grad_norm": 0.2587415321407086, | |
| "learning_rate": 1.306659205372132e-05, | |
| "loss": 0.471, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 2.292830188679245, | |
| "grad_norm": 0.2813684595304604, | |
| "learning_rate": 1.3038612199216566e-05, | |
| "loss": 0.5033, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.2943396226415094, | |
| "grad_norm": 0.34086813878117894, | |
| "learning_rate": 1.301063234471181e-05, | |
| "loss": 0.5173, | |
| "step": 1521 | |
| }, | |
| { | |
| "epoch": 2.2958490566037737, | |
| "grad_norm": 0.29897003760539104, | |
| "learning_rate": 1.2982652490207051e-05, | |
| "loss": 0.4927, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 2.2973584905660376, | |
| "grad_norm": 0.3875603639423126, | |
| "learning_rate": 1.2954672635702294e-05, | |
| "loss": 0.5581, | |
| "step": 1523 | |
| }, | |
| { | |
| "epoch": 2.298867924528302, | |
| "grad_norm": 0.2862074867727448, | |
| "learning_rate": 1.292669278119754e-05, | |
| "loss": 0.4862, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 2.3003773584905662, | |
| "grad_norm": 0.30868691166763873, | |
| "learning_rate": 1.2898712926692783e-05, | |
| "loss": 0.5272, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 2.30188679245283, | |
| "grad_norm": 0.29790893971786336, | |
| "learning_rate": 1.2870733072188026e-05, | |
| "loss": 0.5266, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 2.3033962264150944, | |
| "grad_norm": 0.2782090398414401, | |
| "learning_rate": 1.2842753217683268e-05, | |
| "loss": 0.5462, | |
| "step": 1527 | |
| }, | |
| { | |
| "epoch": 2.3049056603773583, | |
| "grad_norm": 0.2854745590256956, | |
| "learning_rate": 1.2814773363178511e-05, | |
| "loss": 0.4805, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 2.3064150943396227, | |
| "grad_norm": 0.3113454727191614, | |
| "learning_rate": 1.2786793508673756e-05, | |
| "loss": 0.4902, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 2.3079245283018865, | |
| "grad_norm": 0.25928474454077677, | |
| "learning_rate": 1.2758813654169e-05, | |
| "loss": 0.4906, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.309433962264151, | |
| "grad_norm": 0.26698105372178027, | |
| "learning_rate": 1.2730833799664243e-05, | |
| "loss": 0.5508, | |
| "step": 1531 | |
| }, | |
| { | |
| "epoch": 2.310943396226415, | |
| "grad_norm": 0.26530252126413506, | |
| "learning_rate": 1.2702853945159485e-05, | |
| "loss": 0.5375, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 2.312452830188679, | |
| "grad_norm": 0.32824150490138687, | |
| "learning_rate": 1.2674874090654728e-05, | |
| "loss": 0.5316, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 2.3139622641509434, | |
| "grad_norm": 0.2616604148241911, | |
| "learning_rate": 1.2646894236149973e-05, | |
| "loss": 0.4693, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 2.3154716981132077, | |
| "grad_norm": 0.2937527365395457, | |
| "learning_rate": 1.2618914381645216e-05, | |
| "loss": 0.5329, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 2.3169811320754716, | |
| "grad_norm": 0.28549646821620334, | |
| "learning_rate": 1.259093452714046e-05, | |
| "loss": 0.5417, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 2.318490566037736, | |
| "grad_norm": 0.29605947776580616, | |
| "learning_rate": 1.2562954672635701e-05, | |
| "loss": 0.5188, | |
| "step": 1537 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 0.28290540315403917, | |
| "learning_rate": 1.2534974818130946e-05, | |
| "loss": 0.4817, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 2.321509433962264, | |
| "grad_norm": 0.28346799307216214, | |
| "learning_rate": 1.250699496362619e-05, | |
| "loss": 0.5291, | |
| "step": 1539 | |
| }, | |
| { | |
| "epoch": 2.323018867924528, | |
| "grad_norm": 0.3025578648182897, | |
| "learning_rate": 1.2479015109121433e-05, | |
| "loss": 0.5549, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.3245283018867924, | |
| "grad_norm": 0.28767635571955014, | |
| "learning_rate": 1.2451035254616676e-05, | |
| "loss": 0.5334, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 2.3260377358490567, | |
| "grad_norm": 0.31251235625172796, | |
| "learning_rate": 1.242305540011192e-05, | |
| "loss": 0.5222, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 2.3275471698113206, | |
| "grad_norm": 0.3280951350127823, | |
| "learning_rate": 1.2395075545607163e-05, | |
| "loss": 0.5397, | |
| "step": 1543 | |
| }, | |
| { | |
| "epoch": 2.329056603773585, | |
| "grad_norm": 0.313854810082151, | |
| "learning_rate": 1.2367095691102407e-05, | |
| "loss": 0.5483, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 2.3305660377358492, | |
| "grad_norm": 0.2670608264932217, | |
| "learning_rate": 1.233911583659765e-05, | |
| "loss": 0.5254, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 2.332075471698113, | |
| "grad_norm": 0.2719825460240819, | |
| "learning_rate": 1.2311135982092893e-05, | |
| "loss": 0.5459, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 2.3335849056603775, | |
| "grad_norm": 0.31088912379982053, | |
| "learning_rate": 1.2283156127588137e-05, | |
| "loss": 0.5487, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 2.3350943396226413, | |
| "grad_norm": 0.27061842798171537, | |
| "learning_rate": 1.2255176273083382e-05, | |
| "loss": 0.4935, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 2.3366037735849057, | |
| "grad_norm": 0.2831678674322194, | |
| "learning_rate": 1.2227196418578623e-05, | |
| "loss": 0.5351, | |
| "step": 1549 | |
| }, | |
| { | |
| "epoch": 2.33811320754717, | |
| "grad_norm": 0.49601884749761044, | |
| "learning_rate": 1.2199216564073867e-05, | |
| "loss": 0.5137, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.339622641509434, | |
| "grad_norm": 0.27586411056769244, | |
| "learning_rate": 1.217123670956911e-05, | |
| "loss": 0.5412, | |
| "step": 1551 | |
| }, | |
| { | |
| "epoch": 2.341132075471698, | |
| "grad_norm": 0.29114042947531105, | |
| "learning_rate": 1.2143256855064353e-05, | |
| "loss": 0.5458, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 2.342641509433962, | |
| "grad_norm": 0.2689747977811522, | |
| "learning_rate": 1.2115277000559599e-05, | |
| "loss": 0.5441, | |
| "step": 1553 | |
| }, | |
| { | |
| "epoch": 2.3441509433962264, | |
| "grad_norm": 0.261267562119711, | |
| "learning_rate": 1.208729714605484e-05, | |
| "loss": 0.4996, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 2.3456603773584908, | |
| "grad_norm": 0.2630661794726856, | |
| "learning_rate": 1.2059317291550085e-05, | |
| "loss": 0.4945, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 2.3471698113207546, | |
| "grad_norm": 0.2714886371024407, | |
| "learning_rate": 1.2031337437045329e-05, | |
| "loss": 0.5419, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 2.348679245283019, | |
| "grad_norm": 0.27322996123705456, | |
| "learning_rate": 1.200335758254057e-05, | |
| "loss": 0.5344, | |
| "step": 1557 | |
| }, | |
| { | |
| "epoch": 2.350188679245283, | |
| "grad_norm": 0.27446564225924486, | |
| "learning_rate": 1.1975377728035815e-05, | |
| "loss": 0.4914, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 2.351698113207547, | |
| "grad_norm": 0.2786018549729709, | |
| "learning_rate": 1.1947397873531057e-05, | |
| "loss": 0.5524, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 2.3532075471698115, | |
| "grad_norm": 0.2744379654868362, | |
| "learning_rate": 1.1919418019026302e-05, | |
| "loss": 0.5057, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.3547169811320754, | |
| "grad_norm": 0.29383230395925714, | |
| "learning_rate": 1.1891438164521545e-05, | |
| "loss": 0.4995, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 2.3562264150943397, | |
| "grad_norm": 0.27567156486489924, | |
| "learning_rate": 1.1863458310016789e-05, | |
| "loss": 0.5059, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 2.3577358490566036, | |
| "grad_norm": 0.2965755545135667, | |
| "learning_rate": 1.1835478455512032e-05, | |
| "loss": 0.5183, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 2.359245283018868, | |
| "grad_norm": 0.2942882557087409, | |
| "learning_rate": 1.1807498601007276e-05, | |
| "loss": 0.4978, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 2.3607547169811323, | |
| "grad_norm": 0.3612767923459085, | |
| "learning_rate": 1.1779518746502519e-05, | |
| "loss": 0.5482, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 2.362264150943396, | |
| "grad_norm": 0.37068931043688397, | |
| "learning_rate": 1.1751538891997762e-05, | |
| "loss": 0.5417, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 2.3637735849056605, | |
| "grad_norm": 0.3009994319645029, | |
| "learning_rate": 1.1723559037493006e-05, | |
| "loss": 0.5241, | |
| "step": 1567 | |
| }, | |
| { | |
| "epoch": 2.3652830188679244, | |
| "grad_norm": 0.2982051870389813, | |
| "learning_rate": 1.1695579182988249e-05, | |
| "loss": 0.5138, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 2.3667924528301887, | |
| "grad_norm": 0.273290340770851, | |
| "learning_rate": 1.1667599328483492e-05, | |
| "loss": 0.5425, | |
| "step": 1569 | |
| }, | |
| { | |
| "epoch": 2.368301886792453, | |
| "grad_norm": 0.30521472956958234, | |
| "learning_rate": 1.1639619473978736e-05, | |
| "loss": 0.503, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.369811320754717, | |
| "grad_norm": 0.2733183011713467, | |
| "learning_rate": 1.1611639619473979e-05, | |
| "loss": 0.4991, | |
| "step": 1571 | |
| }, | |
| { | |
| "epoch": 2.3713207547169812, | |
| "grad_norm": 0.2700309326901011, | |
| "learning_rate": 1.1583659764969222e-05, | |
| "loss": 0.4871, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 2.372830188679245, | |
| "grad_norm": 0.3165698192789348, | |
| "learning_rate": 1.1555679910464466e-05, | |
| "loss": 0.5259, | |
| "step": 1573 | |
| }, | |
| { | |
| "epoch": 2.3743396226415094, | |
| "grad_norm": 0.32213328390083884, | |
| "learning_rate": 1.152770005595971e-05, | |
| "loss": 0.4876, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 2.3758490566037738, | |
| "grad_norm": 0.3212920255017883, | |
| "learning_rate": 1.1499720201454953e-05, | |
| "loss": 0.5213, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 2.3773584905660377, | |
| "grad_norm": 0.28550611630396816, | |
| "learning_rate": 1.1471740346950196e-05, | |
| "loss": 0.5245, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 2.378867924528302, | |
| "grad_norm": 0.33575387480012625, | |
| "learning_rate": 1.144376049244544e-05, | |
| "loss": 0.5325, | |
| "step": 1577 | |
| }, | |
| { | |
| "epoch": 2.380377358490566, | |
| "grad_norm": 0.3209791568084635, | |
| "learning_rate": 1.1415780637940683e-05, | |
| "loss": 0.5203, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 2.38188679245283, | |
| "grad_norm": 0.3068339238371063, | |
| "learning_rate": 1.1387800783435928e-05, | |
| "loss": 0.5432, | |
| "step": 1579 | |
| }, | |
| { | |
| "epoch": 2.3833962264150945, | |
| "grad_norm": 0.29288109611440577, | |
| "learning_rate": 1.135982092893117e-05, | |
| "loss": 0.4908, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.3849056603773584, | |
| "grad_norm": 0.2823808739473944, | |
| "learning_rate": 1.1331841074426414e-05, | |
| "loss": 0.5247, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 2.3864150943396227, | |
| "grad_norm": 0.2970923159171053, | |
| "learning_rate": 1.1303861219921658e-05, | |
| "loss": 0.5034, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 2.3879245283018866, | |
| "grad_norm": 0.2811796099376647, | |
| "learning_rate": 1.12758813654169e-05, | |
| "loss": 0.513, | |
| "step": 1583 | |
| }, | |
| { | |
| "epoch": 2.389433962264151, | |
| "grad_norm": 0.28383899311229177, | |
| "learning_rate": 1.1247901510912144e-05, | |
| "loss": 0.4913, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 2.3909433962264153, | |
| "grad_norm": 0.2965229723431742, | |
| "learning_rate": 1.1219921656407386e-05, | |
| "loss": 0.5565, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 2.392452830188679, | |
| "grad_norm": 0.28613474578656095, | |
| "learning_rate": 1.1191941801902631e-05, | |
| "loss": 0.5035, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 2.3939622641509435, | |
| "grad_norm": 0.333638319152662, | |
| "learning_rate": 1.1163961947397875e-05, | |
| "loss": 0.5502, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 2.3954716981132074, | |
| "grad_norm": 0.2762683785182854, | |
| "learning_rate": 1.1135982092893118e-05, | |
| "loss": 0.4957, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 2.3969811320754717, | |
| "grad_norm": 0.2841965209758776, | |
| "learning_rate": 1.1108002238388361e-05, | |
| "loss": 0.5323, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 2.398490566037736, | |
| "grad_norm": 0.30355635904838757, | |
| "learning_rate": 1.1080022383883603e-05, | |
| "loss": 0.5362, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.2613635701106073, | |
| "learning_rate": 1.1052042529378848e-05, | |
| "loss": 0.5163, | |
| "step": 1591 | |
| }, | |
| { | |
| "epoch": 2.4015094339622642, | |
| "grad_norm": 0.616566657926194, | |
| "learning_rate": 1.1024062674874091e-05, | |
| "loss": 0.5233, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 2.403018867924528, | |
| "grad_norm": 0.2693893123464824, | |
| "learning_rate": 1.0996082820369335e-05, | |
| "loss": 0.5075, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 2.4045283018867925, | |
| "grad_norm": 0.27548371010084427, | |
| "learning_rate": 1.0968102965864578e-05, | |
| "loss": 0.5055, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 2.406037735849057, | |
| "grad_norm": 0.2781662462502358, | |
| "learning_rate": 1.0940123111359821e-05, | |
| "loss": 0.5539, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 2.4075471698113207, | |
| "grad_norm": 0.30912389736469736, | |
| "learning_rate": 1.0912143256855065e-05, | |
| "loss": 0.5981, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 2.409056603773585, | |
| "grad_norm": 0.28742057157673107, | |
| "learning_rate": 1.0884163402350308e-05, | |
| "loss": 0.4773, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 2.410566037735849, | |
| "grad_norm": 0.2544732632143813, | |
| "learning_rate": 1.0856183547845552e-05, | |
| "loss": 0.4747, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 2.412075471698113, | |
| "grad_norm": 0.2542075611364493, | |
| "learning_rate": 1.0828203693340795e-05, | |
| "loss": 0.4653, | |
| "step": 1599 | |
| }, | |
| { | |
| "epoch": 2.4135849056603775, | |
| "grad_norm": 0.28973359121279074, | |
| "learning_rate": 1.0800223838836038e-05, | |
| "loss": 0.5229, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.4150943396226414, | |
| "grad_norm": 0.36347700962188983, | |
| "learning_rate": 1.0772243984331282e-05, | |
| "loss": 0.5372, | |
| "step": 1601 | |
| }, | |
| { | |
| "epoch": 2.4166037735849057, | |
| "grad_norm": 0.27163267397417085, | |
| "learning_rate": 1.0744264129826525e-05, | |
| "loss": 0.5022, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 2.4181132075471696, | |
| "grad_norm": 3.40159737994843, | |
| "learning_rate": 1.0716284275321768e-05, | |
| "loss": 0.5791, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 2.419622641509434, | |
| "grad_norm": 0.3024980193028065, | |
| "learning_rate": 1.0688304420817012e-05, | |
| "loss": 0.5413, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 2.4211320754716983, | |
| "grad_norm": 0.3191908802604592, | |
| "learning_rate": 1.0660324566312257e-05, | |
| "loss": 0.5505, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 2.422641509433962, | |
| "grad_norm": 0.34247393903147477, | |
| "learning_rate": 1.0632344711807498e-05, | |
| "loss": 0.5156, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 2.4241509433962265, | |
| "grad_norm": 0.30555621465573174, | |
| "learning_rate": 1.0604364857302742e-05, | |
| "loss": 0.5164, | |
| "step": 1607 | |
| }, | |
| { | |
| "epoch": 2.4256603773584904, | |
| "grad_norm": 0.28883349420538895, | |
| "learning_rate": 1.0576385002797987e-05, | |
| "loss": 0.5196, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 2.4271698113207547, | |
| "grad_norm": 0.29370588202466086, | |
| "learning_rate": 1.0548405148293229e-05, | |
| "loss": 0.4992, | |
| "step": 1609 | |
| }, | |
| { | |
| "epoch": 2.428679245283019, | |
| "grad_norm": 0.2992724879265412, | |
| "learning_rate": 1.0520425293788474e-05, | |
| "loss": 0.5085, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.430188679245283, | |
| "grad_norm": 0.32945758350811966, | |
| "learning_rate": 1.0492445439283715e-05, | |
| "loss": 0.552, | |
| "step": 1611 | |
| }, | |
| { | |
| "epoch": 2.4316981132075473, | |
| "grad_norm": 0.2682269673076967, | |
| "learning_rate": 1.046446558477896e-05, | |
| "loss": 0.5156, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 2.433207547169811, | |
| "grad_norm": 0.28647715743947366, | |
| "learning_rate": 1.0436485730274204e-05, | |
| "loss": 0.4993, | |
| "step": 1613 | |
| }, | |
| { | |
| "epoch": 2.4347169811320755, | |
| "grad_norm": 0.2744939023949034, | |
| "learning_rate": 1.0408505875769445e-05, | |
| "loss": 0.5253, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 2.43622641509434, | |
| "grad_norm": 0.35250397665004307, | |
| "learning_rate": 1.038052602126469e-05, | |
| "loss": 0.5396, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 2.4377358490566037, | |
| "grad_norm": 0.3123726343205459, | |
| "learning_rate": 1.0352546166759932e-05, | |
| "loss": 0.5134, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 2.439245283018868, | |
| "grad_norm": 0.2764691102160436, | |
| "learning_rate": 1.0324566312255177e-05, | |
| "loss": 0.5091, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 2.440754716981132, | |
| "grad_norm": 0.2814627012575424, | |
| "learning_rate": 1.029658645775042e-05, | |
| "loss": 0.489, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 2.442264150943396, | |
| "grad_norm": 0.39565086571218255, | |
| "learning_rate": 1.0268606603245664e-05, | |
| "loss": 0.5338, | |
| "step": 1619 | |
| }, | |
| { | |
| "epoch": 2.4437735849056605, | |
| "grad_norm": 0.3089370742923853, | |
| "learning_rate": 1.0240626748740907e-05, | |
| "loss": 0.5511, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.4452830188679244, | |
| "grad_norm": 0.27889082409535804, | |
| "learning_rate": 1.021264689423615e-05, | |
| "loss": 0.4989, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 2.4467924528301888, | |
| "grad_norm": 0.2801027230431505, | |
| "learning_rate": 1.0184667039731394e-05, | |
| "loss": 0.5151, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 2.4483018867924526, | |
| "grad_norm": 0.2864728776570272, | |
| "learning_rate": 1.0156687185226637e-05, | |
| "loss": 0.5427, | |
| "step": 1623 | |
| }, | |
| { | |
| "epoch": 2.449811320754717, | |
| "grad_norm": 0.2870692848584596, | |
| "learning_rate": 1.012870733072188e-05, | |
| "loss": 0.5044, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 2.4513207547169813, | |
| "grad_norm": 0.27728518831547827, | |
| "learning_rate": 1.0100727476217124e-05, | |
| "loss": 0.5263, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 2.452830188679245, | |
| "grad_norm": 0.28108945164897814, | |
| "learning_rate": 1.0072747621712367e-05, | |
| "loss": 0.5278, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 2.4543396226415095, | |
| "grad_norm": 0.281028089568473, | |
| "learning_rate": 1.004476776720761e-05, | |
| "loss": 0.526, | |
| "step": 1627 | |
| }, | |
| { | |
| "epoch": 2.4558490566037734, | |
| "grad_norm": 0.29992491532026355, | |
| "learning_rate": 1.0016787912702854e-05, | |
| "loss": 0.5543, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 2.4573584905660377, | |
| "grad_norm": 0.2446925829790535, | |
| "learning_rate": 9.988808058198097e-06, | |
| "loss": 0.4827, | |
| "step": 1629 | |
| }, | |
| { | |
| "epoch": 2.458867924528302, | |
| "grad_norm": 0.29289490874253116, | |
| "learning_rate": 9.960828203693341e-06, | |
| "loss": 0.5348, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.460377358490566, | |
| "grad_norm": 0.3010609740305946, | |
| "learning_rate": 9.932848349188586e-06, | |
| "loss": 0.5043, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 2.4618867924528303, | |
| "grad_norm": 0.28789020180830055, | |
| "learning_rate": 9.904868494683828e-06, | |
| "loss": 0.5147, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 2.463396226415094, | |
| "grad_norm": 0.26705365000795894, | |
| "learning_rate": 9.876888640179071e-06, | |
| "loss": 0.5287, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 2.4649056603773585, | |
| "grad_norm": 0.2798599523967274, | |
| "learning_rate": 9.848908785674316e-06, | |
| "loss": 0.4933, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 2.466415094339623, | |
| "grad_norm": 0.2733384516616599, | |
| "learning_rate": 9.820928931169558e-06, | |
| "loss": 0.5029, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 2.4679245283018867, | |
| "grad_norm": 0.29643592834837407, | |
| "learning_rate": 9.792949076664803e-06, | |
| "loss": 0.5158, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 2.469433962264151, | |
| "grad_norm": 0.28526464289507686, | |
| "learning_rate": 9.764969222160044e-06, | |
| "loss": 0.55, | |
| "step": 1637 | |
| }, | |
| { | |
| "epoch": 2.470943396226415, | |
| "grad_norm": 0.26357930501597526, | |
| "learning_rate": 9.73698936765529e-06, | |
| "loss": 0.4749, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 2.4724528301886792, | |
| "grad_norm": 0.27127677302581304, | |
| "learning_rate": 9.709009513150533e-06, | |
| "loss": 0.4896, | |
| "step": 1639 | |
| }, | |
| { | |
| "epoch": 2.4739622641509436, | |
| "grad_norm": 0.3258038736676753, | |
| "learning_rate": 9.681029658645774e-06, | |
| "loss": 0.5453, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.4754716981132074, | |
| "grad_norm": 0.2721708903510538, | |
| "learning_rate": 9.65304980414102e-06, | |
| "loss": 0.5088, | |
| "step": 1641 | |
| }, | |
| { | |
| "epoch": 2.4769811320754718, | |
| "grad_norm": 0.25031552664361656, | |
| "learning_rate": 9.625069949636261e-06, | |
| "loss": 0.4432, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 2.4784905660377357, | |
| "grad_norm": 0.27170615085235295, | |
| "learning_rate": 9.597090095131506e-06, | |
| "loss": 0.5401, | |
| "step": 1643 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.25077059492786125, | |
| "learning_rate": 9.56911024062675e-06, | |
| "loss": 0.5061, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 2.4815094339622643, | |
| "grad_norm": 0.4381733597945453, | |
| "learning_rate": 9.541130386121993e-06, | |
| "loss": 0.5121, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 2.483018867924528, | |
| "grad_norm": 0.30828080858038703, | |
| "learning_rate": 9.513150531617236e-06, | |
| "loss": 0.4767, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 2.4845283018867925, | |
| "grad_norm": 0.2679178849366846, | |
| "learning_rate": 9.48517067711248e-06, | |
| "loss": 0.5115, | |
| "step": 1647 | |
| }, | |
| { | |
| "epoch": 2.486037735849057, | |
| "grad_norm": 0.25498782051352026, | |
| "learning_rate": 9.457190822607723e-06, | |
| "loss": 0.4925, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 2.4875471698113207, | |
| "grad_norm": 0.2712046865589304, | |
| "learning_rate": 9.429210968102966e-06, | |
| "loss": 0.5239, | |
| "step": 1649 | |
| }, | |
| { | |
| "epoch": 2.489056603773585, | |
| "grad_norm": 0.2841429645079444, | |
| "learning_rate": 9.40123111359821e-06, | |
| "loss": 0.5038, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.490566037735849, | |
| "grad_norm": 0.2922250090980482, | |
| "learning_rate": 9.373251259093453e-06, | |
| "loss": 0.5323, | |
| "step": 1651 | |
| }, | |
| { | |
| "epoch": 2.4920754716981133, | |
| "grad_norm": 0.3490130576354896, | |
| "learning_rate": 9.345271404588697e-06, | |
| "loss": 0.5219, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 2.493584905660377, | |
| "grad_norm": 0.2911807875623749, | |
| "learning_rate": 9.31729155008394e-06, | |
| "loss": 0.5356, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 2.4950943396226415, | |
| "grad_norm": 0.26818440305605495, | |
| "learning_rate": 9.289311695579183e-06, | |
| "loss": 0.4687, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 2.496603773584906, | |
| "grad_norm": 0.2629570306134703, | |
| "learning_rate": 9.261331841074427e-06, | |
| "loss": 0.5058, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 2.4981132075471697, | |
| "grad_norm": 0.27394341115092274, | |
| "learning_rate": 9.23335198656967e-06, | |
| "loss": 0.5189, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 2.499622641509434, | |
| "grad_norm": 0.31719664729022873, | |
| "learning_rate": 9.205372132064913e-06, | |
| "loss": 0.5479, | |
| "step": 1657 | |
| }, | |
| { | |
| "epoch": 2.5011320754716984, | |
| "grad_norm": 0.28948224769873077, | |
| "learning_rate": 9.177392277560157e-06, | |
| "loss": 0.5195, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 2.5026415094339622, | |
| "grad_norm": 0.27872732971809544, | |
| "learning_rate": 9.1494124230554e-06, | |
| "loss": 0.4954, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 2.5041509433962266, | |
| "grad_norm": 0.2972680704163209, | |
| "learning_rate": 9.121432568550645e-06, | |
| "loss": 0.5492, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.5056603773584905, | |
| "grad_norm": 0.258478206277154, | |
| "learning_rate": 9.093452714045887e-06, | |
| "loss": 0.4811, | |
| "step": 1661 | |
| }, | |
| { | |
| "epoch": 2.507169811320755, | |
| "grad_norm": 0.27135076957243554, | |
| "learning_rate": 9.065472859541132e-06, | |
| "loss": 0.5241, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 2.5086792452830187, | |
| "grad_norm": 0.25049127152785716, | |
| "learning_rate": 9.037493005036374e-06, | |
| "loss": 0.5322, | |
| "step": 1663 | |
| }, | |
| { | |
| "epoch": 2.510188679245283, | |
| "grad_norm": 0.2414933347105079, | |
| "learning_rate": 9.009513150531617e-06, | |
| "loss": 0.5065, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 2.5116981132075473, | |
| "grad_norm": 0.26905976255414465, | |
| "learning_rate": 8.981533296026862e-06, | |
| "loss": 0.5204, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 2.513207547169811, | |
| "grad_norm": 0.2574263160663239, | |
| "learning_rate": 8.953553441522104e-06, | |
| "loss": 0.4995, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 2.5147169811320755, | |
| "grad_norm": 0.2412927664002052, | |
| "learning_rate": 8.925573587017349e-06, | |
| "loss": 0.5189, | |
| "step": 1667 | |
| }, | |
| { | |
| "epoch": 2.51622641509434, | |
| "grad_norm": 0.2724697409186211, | |
| "learning_rate": 8.89759373251259e-06, | |
| "loss": 0.4707, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 2.5177358490566037, | |
| "grad_norm": 0.2649030391477551, | |
| "learning_rate": 8.869613878007835e-06, | |
| "loss": 0.5068, | |
| "step": 1669 | |
| }, | |
| { | |
| "epoch": 2.519245283018868, | |
| "grad_norm": 1.937782774908197, | |
| "learning_rate": 8.841634023503079e-06, | |
| "loss": 0.4939, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.520754716981132, | |
| "grad_norm": 0.26438563805690896, | |
| "learning_rate": 8.81365416899832e-06, | |
| "loss": 0.5031, | |
| "step": 1671 | |
| }, | |
| { | |
| "epoch": 2.5222641509433963, | |
| "grad_norm": 0.254419310430134, | |
| "learning_rate": 8.785674314493565e-06, | |
| "loss": 0.5183, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 2.52377358490566, | |
| "grad_norm": 0.27032143419025384, | |
| "learning_rate": 8.757694459988809e-06, | |
| "loss": 0.4891, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 2.5252830188679245, | |
| "grad_norm": 0.2714305046812353, | |
| "learning_rate": 8.729714605484052e-06, | |
| "loss": 0.5136, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 2.526792452830189, | |
| "grad_norm": 0.27145056603532564, | |
| "learning_rate": 8.701734750979296e-06, | |
| "loss": 0.5275, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 2.5283018867924527, | |
| "grad_norm": 0.2636503580959262, | |
| "learning_rate": 8.673754896474539e-06, | |
| "loss": 0.5661, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 2.529811320754717, | |
| "grad_norm": 0.2862829068236602, | |
| "learning_rate": 8.645775041969782e-06, | |
| "loss": 0.5069, | |
| "step": 1677 | |
| }, | |
| { | |
| "epoch": 2.5313207547169814, | |
| "grad_norm": 0.2629202015341607, | |
| "learning_rate": 8.617795187465026e-06, | |
| "loss": 0.5216, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 2.5328301886792453, | |
| "grad_norm": 0.2512663214374365, | |
| "learning_rate": 8.589815332960269e-06, | |
| "loss": 0.4779, | |
| "step": 1679 | |
| }, | |
| { | |
| "epoch": 2.5343396226415096, | |
| "grad_norm": 0.2582674957787444, | |
| "learning_rate": 8.561835478455512e-06, | |
| "loss": 0.5012, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.5358490566037735, | |
| "grad_norm": 0.26493998715379696, | |
| "learning_rate": 8.533855623950756e-06, | |
| "loss": 0.5263, | |
| "step": 1681 | |
| }, | |
| { | |
| "epoch": 2.537358490566038, | |
| "grad_norm": 0.2451699637847086, | |
| "learning_rate": 8.505875769445999e-06, | |
| "loss": 0.4737, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 2.5388679245283017, | |
| "grad_norm": 0.2646435180201724, | |
| "learning_rate": 8.477895914941242e-06, | |
| "loss": 0.5208, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 2.540377358490566, | |
| "grad_norm": 0.2582198344619996, | |
| "learning_rate": 8.449916060436486e-06, | |
| "loss": 0.4994, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 2.5418867924528303, | |
| "grad_norm": 0.24851559763189898, | |
| "learning_rate": 8.42193620593173e-06, | |
| "loss": 0.4999, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 2.543396226415094, | |
| "grad_norm": 0.2554578588043926, | |
| "learning_rate": 8.393956351426974e-06, | |
| "loss": 0.5173, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 2.5449056603773585, | |
| "grad_norm": 0.29270235106198766, | |
| "learning_rate": 8.365976496922216e-06, | |
| "loss": 0.5509, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 2.546415094339623, | |
| "grad_norm": 0.2521719040156708, | |
| "learning_rate": 8.337996642417461e-06, | |
| "loss": 0.4927, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 2.5479245283018868, | |
| "grad_norm": 0.24926253146252536, | |
| "learning_rate": 8.310016787912703e-06, | |
| "loss": 0.5119, | |
| "step": 1689 | |
| }, | |
| { | |
| "epoch": 2.549433962264151, | |
| "grad_norm": 0.24764507855189977, | |
| "learning_rate": 8.282036933407946e-06, | |
| "loss": 0.5169, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.550943396226415, | |
| "grad_norm": 0.2846935555038941, | |
| "learning_rate": 8.254057078903191e-06, | |
| "loss": 0.5303, | |
| "step": 1691 | |
| }, | |
| { | |
| "epoch": 2.5524528301886793, | |
| "grad_norm": 1.3298175058900719, | |
| "learning_rate": 8.226077224398433e-06, | |
| "loss": 0.4987, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 2.553962264150943, | |
| "grad_norm": 0.26585384992525096, | |
| "learning_rate": 8.198097369893678e-06, | |
| "loss": 0.533, | |
| "step": 1693 | |
| }, | |
| { | |
| "epoch": 2.5554716981132075, | |
| "grad_norm": 0.26892517763510493, | |
| "learning_rate": 8.17011751538892e-06, | |
| "loss": 0.5505, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 2.556981132075472, | |
| "grad_norm": 0.27708427633239685, | |
| "learning_rate": 8.142137660884165e-06, | |
| "loss": 0.4835, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 2.5584905660377357, | |
| "grad_norm": 0.25860083912017584, | |
| "learning_rate": 8.114157806379408e-06, | |
| "loss": 0.4909, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.28186533953501614, | |
| "learning_rate": 8.08617795187465e-06, | |
| "loss": 0.5547, | |
| "step": 1697 | |
| }, | |
| { | |
| "epoch": 2.5615094339622644, | |
| "grad_norm": 0.3569206208752698, | |
| "learning_rate": 8.058198097369895e-06, | |
| "loss": 0.5023, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 2.5630188679245283, | |
| "grad_norm": 0.26678040300011296, | |
| "learning_rate": 8.030218242865136e-06, | |
| "loss": 0.514, | |
| "step": 1699 | |
| }, | |
| { | |
| "epoch": 2.5645283018867926, | |
| "grad_norm": 0.27519779245794745, | |
| "learning_rate": 8.002238388360381e-06, | |
| "loss": 0.499, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.5660377358490565, | |
| "grad_norm": 0.28184342859209355, | |
| "learning_rate": 7.974258533855625e-06, | |
| "loss": 0.5079, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 2.567547169811321, | |
| "grad_norm": 0.2696907607563735, | |
| "learning_rate": 7.946278679350868e-06, | |
| "loss": 0.5349, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 2.5690566037735847, | |
| "grad_norm": 0.34249892968895884, | |
| "learning_rate": 7.918298824846111e-06, | |
| "loss": 0.4951, | |
| "step": 1703 | |
| }, | |
| { | |
| "epoch": 2.570566037735849, | |
| "grad_norm": 0.28104386328396547, | |
| "learning_rate": 7.890318970341355e-06, | |
| "loss": 0.4748, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 2.5720754716981133, | |
| "grad_norm": 0.27841856070995974, | |
| "learning_rate": 7.862339115836598e-06, | |
| "loss": 0.5164, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 2.5735849056603772, | |
| "grad_norm": 0.26103373360378673, | |
| "learning_rate": 7.834359261331841e-06, | |
| "loss": 0.5196, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 2.5750943396226416, | |
| "grad_norm": 0.28226689926781434, | |
| "learning_rate": 7.806379406827085e-06, | |
| "loss": 0.5471, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 2.576603773584906, | |
| "grad_norm": 0.260925718946685, | |
| "learning_rate": 7.778399552322328e-06, | |
| "loss": 0.5245, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 2.5781132075471698, | |
| "grad_norm": 0.2651170853166187, | |
| "learning_rate": 7.750419697817572e-06, | |
| "loss": 0.5191, | |
| "step": 1709 | |
| }, | |
| { | |
| "epoch": 2.579622641509434, | |
| "grad_norm": 0.2878754716574323, | |
| "learning_rate": 7.722439843312815e-06, | |
| "loss": 0.5276, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.581132075471698, | |
| "grad_norm": 0.38149999629802744, | |
| "learning_rate": 7.694459988808058e-06, | |
| "loss": 0.5063, | |
| "step": 1711 | |
| }, | |
| { | |
| "epoch": 2.5826415094339623, | |
| "grad_norm": 0.25769713900097513, | |
| "learning_rate": 7.666480134303302e-06, | |
| "loss": 0.4829, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 2.584150943396226, | |
| "grad_norm": 0.2537097287006656, | |
| "learning_rate": 7.638500279798545e-06, | |
| "loss": 0.5452, | |
| "step": 1713 | |
| }, | |
| { | |
| "epoch": 2.5856603773584905, | |
| "grad_norm": 0.26625886535127746, | |
| "learning_rate": 7.610520425293789e-06, | |
| "loss": 0.4987, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 2.587169811320755, | |
| "grad_norm": 0.27567634014604986, | |
| "learning_rate": 7.582540570789032e-06, | |
| "loss": 0.5292, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 2.5886792452830187, | |
| "grad_norm": 0.2745735348694907, | |
| "learning_rate": 7.554560716284276e-06, | |
| "loss": 0.5124, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 2.590188679245283, | |
| "grad_norm": 0.27869253594934607, | |
| "learning_rate": 7.526580861779519e-06, | |
| "loss": 0.5011, | |
| "step": 1717 | |
| }, | |
| { | |
| "epoch": 2.5916981132075474, | |
| "grad_norm": 0.26083708585981563, | |
| "learning_rate": 7.498601007274763e-06, | |
| "loss": 0.5063, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 2.5932075471698113, | |
| "grad_norm": 0.25898145180183996, | |
| "learning_rate": 7.470621152770006e-06, | |
| "loss": 0.5263, | |
| "step": 1719 | |
| }, | |
| { | |
| "epoch": 2.5947169811320756, | |
| "grad_norm": 0.25116990389413063, | |
| "learning_rate": 7.4426412982652486e-06, | |
| "loss": 0.5004, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.5962264150943395, | |
| "grad_norm": 0.26255445723201376, | |
| "learning_rate": 7.414661443760493e-06, | |
| "loss": 0.4588, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 2.597735849056604, | |
| "grad_norm": 0.29059302191645914, | |
| "learning_rate": 7.386681589255737e-06, | |
| "loss": 0.5715, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 2.5992452830188677, | |
| "grad_norm": 0.259169324744869, | |
| "learning_rate": 7.3587017347509795e-06, | |
| "loss": 0.5066, | |
| "step": 1723 | |
| }, | |
| { | |
| "epoch": 2.600754716981132, | |
| "grad_norm": 0.2550956884217296, | |
| "learning_rate": 7.330721880246223e-06, | |
| "loss": 0.4929, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 2.6022641509433964, | |
| "grad_norm": 0.26605119100893077, | |
| "learning_rate": 7.302742025741466e-06, | |
| "loss": 0.5148, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 2.6037735849056602, | |
| "grad_norm": 0.2839684129223697, | |
| "learning_rate": 7.27476217123671e-06, | |
| "loss": 0.5379, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 2.6052830188679246, | |
| "grad_norm": 0.24671746161838828, | |
| "learning_rate": 7.246782316731954e-06, | |
| "loss": 0.5344, | |
| "step": 1727 | |
| }, | |
| { | |
| "epoch": 2.606792452830189, | |
| "grad_norm": 0.26950728167251553, | |
| "learning_rate": 7.218802462227196e-06, | |
| "loss": 0.5269, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 2.608301886792453, | |
| "grad_norm": 0.30410413396696345, | |
| "learning_rate": 7.1908226077224405e-06, | |
| "loss": 0.5198, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 2.609811320754717, | |
| "grad_norm": 0.3012042476084446, | |
| "learning_rate": 7.162842753217684e-06, | |
| "loss": 0.5261, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.611320754716981, | |
| "grad_norm": 0.25613913625539647, | |
| "learning_rate": 7.134862898712926e-06, | |
| "loss": 0.5445, | |
| "step": 1731 | |
| }, | |
| { | |
| "epoch": 2.6128301886792453, | |
| "grad_norm": 0.24351065578422196, | |
| "learning_rate": 7.106883044208171e-06, | |
| "loss": 0.4737, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 2.614339622641509, | |
| "grad_norm": 0.27310531472703253, | |
| "learning_rate": 7.078903189703413e-06, | |
| "loss": 0.5484, | |
| "step": 1733 | |
| }, | |
| { | |
| "epoch": 2.6158490566037735, | |
| "grad_norm": 0.27115505717512334, | |
| "learning_rate": 7.050923335198657e-06, | |
| "loss": 0.5029, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 2.617358490566038, | |
| "grad_norm": 0.3029677136637512, | |
| "learning_rate": 7.0229434806939016e-06, | |
| "loss": 0.5418, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 2.6188679245283017, | |
| "grad_norm": 0.27908867772659823, | |
| "learning_rate": 6.994963626189144e-06, | |
| "loss": 0.5047, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 2.620377358490566, | |
| "grad_norm": 0.26058238891559354, | |
| "learning_rate": 6.9669837716843874e-06, | |
| "loss": 0.4792, | |
| "step": 1737 | |
| }, | |
| { | |
| "epoch": 2.6218867924528304, | |
| "grad_norm": 0.2675541705765002, | |
| "learning_rate": 6.93900391717963e-06, | |
| "loss": 0.5317, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 2.6233962264150943, | |
| "grad_norm": 0.27497640444418847, | |
| "learning_rate": 6.911024062674874e-06, | |
| "loss": 0.5155, | |
| "step": 1739 | |
| }, | |
| { | |
| "epoch": 2.6249056603773586, | |
| "grad_norm": 0.2744416701414958, | |
| "learning_rate": 6.883044208170118e-06, | |
| "loss": 0.5331, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.6264150943396225, | |
| "grad_norm": 0.2601570495959637, | |
| "learning_rate": 6.855064353665361e-06, | |
| "loss": 0.5181, | |
| "step": 1741 | |
| }, | |
| { | |
| "epoch": 2.627924528301887, | |
| "grad_norm": 0.5745643045304843, | |
| "learning_rate": 6.827084499160605e-06, | |
| "loss": 0.543, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 2.6294339622641507, | |
| "grad_norm": 0.2511923804891276, | |
| "learning_rate": 6.7991046446558485e-06, | |
| "loss": 0.5087, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 2.630943396226415, | |
| "grad_norm": 0.28546674221830076, | |
| "learning_rate": 6.771124790151091e-06, | |
| "loss": 0.5444, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 2.6324528301886794, | |
| "grad_norm": 0.28540740645360846, | |
| "learning_rate": 6.743144935646335e-06, | |
| "loss": 0.5114, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 2.6339622641509433, | |
| "grad_norm": 0.2930716219200601, | |
| "learning_rate": 6.715165081141578e-06, | |
| "loss": 0.4819, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 2.6354716981132076, | |
| "grad_norm": 0.25044896688250096, | |
| "learning_rate": 6.687185226636822e-06, | |
| "loss": 0.5132, | |
| "step": 1747 | |
| }, | |
| { | |
| "epoch": 2.636981132075472, | |
| "grad_norm": 0.27658711363810984, | |
| "learning_rate": 6.659205372132066e-06, | |
| "loss": 0.5184, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 2.638490566037736, | |
| "grad_norm": 1.2715522760039857, | |
| "learning_rate": 6.631225517627309e-06, | |
| "loss": 0.4769, | |
| "step": 1749 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.2606537545098945, | |
| "learning_rate": 6.603245663122552e-06, | |
| "loss": 0.5156, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.641509433962264, | |
| "grad_norm": 0.25476970965477846, | |
| "learning_rate": 6.5752658086177945e-06, | |
| "loss": 0.5238, | |
| "step": 1751 | |
| }, | |
| { | |
| "epoch": 2.6430188679245283, | |
| "grad_norm": 0.2760824398454106, | |
| "learning_rate": 6.547285954113039e-06, | |
| "loss": 0.4519, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 2.644528301886792, | |
| "grad_norm": 0.2552261697445966, | |
| "learning_rate": 6.519306099608283e-06, | |
| "loss": 0.5238, | |
| "step": 1753 | |
| }, | |
| { | |
| "epoch": 2.6460377358490565, | |
| "grad_norm": 0.2505994172404276, | |
| "learning_rate": 6.4913262451035254e-06, | |
| "loss": 0.4914, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 2.647547169811321, | |
| "grad_norm": 0.2622458535402264, | |
| "learning_rate": 6.46334639059877e-06, | |
| "loss": 0.5338, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 2.6490566037735848, | |
| "grad_norm": 0.2678028391066474, | |
| "learning_rate": 6.435366536094013e-06, | |
| "loss": 0.5065, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 2.650566037735849, | |
| "grad_norm": 0.2484890494798296, | |
| "learning_rate": 6.4073866815892555e-06, | |
| "loss": 0.4955, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 2.6520754716981134, | |
| "grad_norm": 0.2400998987991155, | |
| "learning_rate": 6.3794068270845e-06, | |
| "loss": 0.479, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 2.6535849056603773, | |
| "grad_norm": 0.32685735246268977, | |
| "learning_rate": 6.351426972579742e-06, | |
| "loss": 0.538, | |
| "step": 1759 | |
| }, | |
| { | |
| "epoch": 2.6550943396226416, | |
| "grad_norm": 0.2603240897335215, | |
| "learning_rate": 6.3234471180749865e-06, | |
| "loss": 0.498, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.6566037735849055, | |
| "grad_norm": 0.26860424402573896, | |
| "learning_rate": 6.29546726357023e-06, | |
| "loss": 0.5033, | |
| "step": 1761 | |
| }, | |
| { | |
| "epoch": 2.65811320754717, | |
| "grad_norm": 0.27530236656485146, | |
| "learning_rate": 6.267487409065473e-06, | |
| "loss": 0.4868, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 2.6596226415094337, | |
| "grad_norm": 0.24655990905403674, | |
| "learning_rate": 6.2395075545607166e-06, | |
| "loss": 0.4981, | |
| "step": 1763 | |
| }, | |
| { | |
| "epoch": 2.661132075471698, | |
| "grad_norm": 0.2620387036360828, | |
| "learning_rate": 6.21152770005596e-06, | |
| "loss": 0.5074, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 2.6626415094339624, | |
| "grad_norm": 0.2680227306633879, | |
| "learning_rate": 6.183547845551203e-06, | |
| "loss": 0.5265, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 2.6641509433962263, | |
| "grad_norm": 0.30502521082126244, | |
| "learning_rate": 6.155567991046447e-06, | |
| "loss": 0.5369, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 2.6656603773584906, | |
| "grad_norm": 0.2600029610076752, | |
| "learning_rate": 6.127588136541691e-06, | |
| "loss": 0.496, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 2.667169811320755, | |
| "grad_norm": 0.31160692938753265, | |
| "learning_rate": 6.099608282036933e-06, | |
| "loss": 0.5235, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 2.668679245283019, | |
| "grad_norm": 0.25597688257703527, | |
| "learning_rate": 6.071628427532177e-06, | |
| "loss": 0.5042, | |
| "step": 1769 | |
| }, | |
| { | |
| "epoch": 2.670188679245283, | |
| "grad_norm": 0.2713273198602419, | |
| "learning_rate": 6.04364857302742e-06, | |
| "loss": 0.5259, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.671698113207547, | |
| "grad_norm": 0.2665123339756921, | |
| "learning_rate": 6.015668718522664e-06, | |
| "loss": 0.4882, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 2.6732075471698113, | |
| "grad_norm": 0.25296669006459144, | |
| "learning_rate": 5.987688864017908e-06, | |
| "loss": 0.5413, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 2.6747169811320752, | |
| "grad_norm": 0.2429367748155575, | |
| "learning_rate": 5.959709009513151e-06, | |
| "loss": 0.518, | |
| "step": 1773 | |
| }, | |
| { | |
| "epoch": 2.6762264150943396, | |
| "grad_norm": 0.23850623638622218, | |
| "learning_rate": 5.931729155008394e-06, | |
| "loss": 0.4992, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 2.677735849056604, | |
| "grad_norm": 0.2749471413882996, | |
| "learning_rate": 5.903749300503638e-06, | |
| "loss": 0.5218, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 2.6792452830188678, | |
| "grad_norm": 0.27456504003042126, | |
| "learning_rate": 5.875769445998881e-06, | |
| "loss": 0.471, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 2.680754716981132, | |
| "grad_norm": 6.661889115901275, | |
| "learning_rate": 5.8477895914941245e-06, | |
| "loss": 0.8345, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 2.6822641509433964, | |
| "grad_norm": 0.3348543340219312, | |
| "learning_rate": 5.819809736989368e-06, | |
| "loss": 0.529, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 2.6837735849056603, | |
| "grad_norm": 0.4156712628420884, | |
| "learning_rate": 5.791829882484611e-06, | |
| "loss": 0.5019, | |
| "step": 1779 | |
| }, | |
| { | |
| "epoch": 2.6852830188679246, | |
| "grad_norm": 0.3163158572906154, | |
| "learning_rate": 5.763850027979855e-06, | |
| "loss": 0.5285, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.6867924528301885, | |
| "grad_norm": 0.2574033108522731, | |
| "learning_rate": 5.735870173475098e-06, | |
| "loss": 0.5036, | |
| "step": 1781 | |
| }, | |
| { | |
| "epoch": 2.688301886792453, | |
| "grad_norm": 0.2579216297103016, | |
| "learning_rate": 5.707890318970341e-06, | |
| "loss": 0.5149, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 2.6898113207547167, | |
| "grad_norm": 0.2566957267530941, | |
| "learning_rate": 5.679910464465585e-06, | |
| "loss": 0.5001, | |
| "step": 1783 | |
| }, | |
| { | |
| "epoch": 2.691320754716981, | |
| "grad_norm": 0.2440800355297943, | |
| "learning_rate": 5.651930609960829e-06, | |
| "loss": 0.4883, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 2.6928301886792454, | |
| "grad_norm": 0.2474228533524819, | |
| "learning_rate": 5.623950755456072e-06, | |
| "loss": 0.51, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 2.6943396226415093, | |
| "grad_norm": 0.2482868350432357, | |
| "learning_rate": 5.595970900951316e-06, | |
| "loss": 0.5291, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 2.6958490566037736, | |
| "grad_norm": 0.23259366850007013, | |
| "learning_rate": 5.567991046446559e-06, | |
| "loss": 0.4855, | |
| "step": 1787 | |
| }, | |
| { | |
| "epoch": 2.697358490566038, | |
| "grad_norm": 0.2460435686988639, | |
| "learning_rate": 5.5400111919418015e-06, | |
| "loss": 0.5084, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 2.698867924528302, | |
| "grad_norm": 0.26038359055504035, | |
| "learning_rate": 5.512031337437046e-06, | |
| "loss": 0.5132, | |
| "step": 1789 | |
| }, | |
| { | |
| "epoch": 2.700377358490566, | |
| "grad_norm": 0.28067308819480147, | |
| "learning_rate": 5.484051482932289e-06, | |
| "loss": 0.567, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.70188679245283, | |
| "grad_norm": 0.2471009110626984, | |
| "learning_rate": 5.456071628427532e-06, | |
| "loss": 0.516, | |
| "step": 1791 | |
| }, | |
| { | |
| "epoch": 2.7033962264150944, | |
| "grad_norm": 0.26721610477068264, | |
| "learning_rate": 5.428091773922776e-06, | |
| "loss": 0.5093, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 2.7049056603773582, | |
| "grad_norm": 0.2498621325235465, | |
| "learning_rate": 5.400111919418019e-06, | |
| "loss": 0.4712, | |
| "step": 1793 | |
| }, | |
| { | |
| "epoch": 2.7064150943396226, | |
| "grad_norm": 0.25672107157026186, | |
| "learning_rate": 5.3721320649132625e-06, | |
| "loss": 0.4877, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 2.707924528301887, | |
| "grad_norm": 0.22665414380543533, | |
| "learning_rate": 5.344152210408506e-06, | |
| "loss": 0.4315, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 2.709433962264151, | |
| "grad_norm": 0.24408552016267313, | |
| "learning_rate": 5.316172355903749e-06, | |
| "loss": 0.5067, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 2.710943396226415, | |
| "grad_norm": 0.24060193772424485, | |
| "learning_rate": 5.2881925013989934e-06, | |
| "loss": 0.4887, | |
| "step": 1797 | |
| }, | |
| { | |
| "epoch": 2.7124528301886794, | |
| "grad_norm": 0.2626077977831592, | |
| "learning_rate": 5.260212646894237e-06, | |
| "loss": 0.5321, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 2.7139622641509433, | |
| "grad_norm": 0.26635629306405656, | |
| "learning_rate": 5.23223279238948e-06, | |
| "loss": 0.4984, | |
| "step": 1799 | |
| }, | |
| { | |
| "epoch": 2.7154716981132077, | |
| "grad_norm": 0.27439116564294674, | |
| "learning_rate": 5.204252937884723e-06, | |
| "loss": 0.5383, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.7169811320754715, | |
| "grad_norm": 0.27336316693975615, | |
| "learning_rate": 5.176273083379966e-06, | |
| "loss": 0.4991, | |
| "step": 1801 | |
| }, | |
| { | |
| "epoch": 2.718490566037736, | |
| "grad_norm": 0.25078792605060657, | |
| "learning_rate": 5.14829322887521e-06, | |
| "loss": 0.4746, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 2.7199999999999998, | |
| "grad_norm": 0.2579469072246058, | |
| "learning_rate": 5.120313374370454e-06, | |
| "loss": 0.528, | |
| "step": 1803 | |
| }, | |
| { | |
| "epoch": 2.721509433962264, | |
| "grad_norm": 0.3410333484332067, | |
| "learning_rate": 5.092333519865697e-06, | |
| "loss": 0.4824, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 2.7230188679245284, | |
| "grad_norm": 0.34511657220725606, | |
| "learning_rate": 5.06435366536094e-06, | |
| "loss": 0.4832, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 2.7245283018867923, | |
| "grad_norm": 0.23917828132388028, | |
| "learning_rate": 5.036373810856184e-06, | |
| "loss": 0.5131, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 2.7260377358490566, | |
| "grad_norm": 0.250710939200575, | |
| "learning_rate": 5.008393956351427e-06, | |
| "loss": 0.5168, | |
| "step": 1807 | |
| }, | |
| { | |
| "epoch": 2.727547169811321, | |
| "grad_norm": 0.25569387237719443, | |
| "learning_rate": 4.9804141018466704e-06, | |
| "loss": 0.5251, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 2.729056603773585, | |
| "grad_norm": 0.2595212700539186, | |
| "learning_rate": 4.952434247341914e-06, | |
| "loss": 0.4766, | |
| "step": 1809 | |
| }, | |
| { | |
| "epoch": 2.730566037735849, | |
| "grad_norm": 0.3827330155254612, | |
| "learning_rate": 4.924454392837158e-06, | |
| "loss": 0.5103, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.732075471698113, | |
| "grad_norm": 0.24296190605611276, | |
| "learning_rate": 4.896474538332401e-06, | |
| "loss": 0.4906, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 2.7335849056603774, | |
| "grad_norm": 0.4549554603421421, | |
| "learning_rate": 4.868494683827645e-06, | |
| "loss": 0.4825, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 2.7350943396226413, | |
| "grad_norm": 0.27193474503030435, | |
| "learning_rate": 4.840514829322887e-06, | |
| "loss": 0.4838, | |
| "step": 1813 | |
| }, | |
| { | |
| "epoch": 2.7366037735849056, | |
| "grad_norm": 0.2563880809210197, | |
| "learning_rate": 4.812534974818131e-06, | |
| "loss": 0.5064, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 2.73811320754717, | |
| "grad_norm": 0.2548907465195013, | |
| "learning_rate": 4.784555120313375e-06, | |
| "loss": 0.5096, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 2.739622641509434, | |
| "grad_norm": 0.2535545744550968, | |
| "learning_rate": 4.756575265808618e-06, | |
| "loss": 0.5075, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 2.741132075471698, | |
| "grad_norm": 0.2538458515041986, | |
| "learning_rate": 4.7285954113038615e-06, | |
| "loss": 0.5233, | |
| "step": 1817 | |
| }, | |
| { | |
| "epoch": 2.7426415094339625, | |
| "grad_norm": 0.2449829091378601, | |
| "learning_rate": 4.700615556799105e-06, | |
| "loss": 0.4963, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 2.7441509433962263, | |
| "grad_norm": 0.2583453664244264, | |
| "learning_rate": 4.672635702294348e-06, | |
| "loss": 0.5075, | |
| "step": 1819 | |
| }, | |
| { | |
| "epoch": 2.7456603773584907, | |
| "grad_norm": 0.24305960423400333, | |
| "learning_rate": 4.644655847789592e-06, | |
| "loss": 0.5265, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.7471698113207546, | |
| "grad_norm": 0.24200712952451542, | |
| "learning_rate": 4.616675993284835e-06, | |
| "loss": 0.4775, | |
| "step": 1821 | |
| }, | |
| { | |
| "epoch": 2.748679245283019, | |
| "grad_norm": 0.28201193390392365, | |
| "learning_rate": 4.588696138780078e-06, | |
| "loss": 0.4837, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 2.7501886792452828, | |
| "grad_norm": 0.26394748624366776, | |
| "learning_rate": 4.5607162842753226e-06, | |
| "loss": 0.5334, | |
| "step": 1823 | |
| }, | |
| { | |
| "epoch": 2.751698113207547, | |
| "grad_norm": 0.24720195678002388, | |
| "learning_rate": 4.532736429770566e-06, | |
| "loss": 0.5021, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 2.7532075471698114, | |
| "grad_norm": 0.30947073261476093, | |
| "learning_rate": 4.5047565752658084e-06, | |
| "loss": 0.5546, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 2.7547169811320753, | |
| "grad_norm": 0.2988805165162394, | |
| "learning_rate": 4.476776720761052e-06, | |
| "loss": 0.5042, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 2.7562264150943396, | |
| "grad_norm": 0.2462987932828045, | |
| "learning_rate": 4.448796866256295e-06, | |
| "loss": 0.5357, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 2.757735849056604, | |
| "grad_norm": 0.2415826373536316, | |
| "learning_rate": 4.420817011751539e-06, | |
| "loss": 0.4834, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 2.759245283018868, | |
| "grad_norm": 0.2535333115257667, | |
| "learning_rate": 4.392837157246783e-06, | |
| "loss": 0.5412, | |
| "step": 1829 | |
| }, | |
| { | |
| "epoch": 2.760754716981132, | |
| "grad_norm": 0.24596768950408637, | |
| "learning_rate": 4.364857302742026e-06, | |
| "loss": 0.5119, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.7622641509433965, | |
| "grad_norm": 0.24845301619797933, | |
| "learning_rate": 4.3368774482372695e-06, | |
| "loss": 0.5117, | |
| "step": 1831 | |
| }, | |
| { | |
| "epoch": 2.7637735849056604, | |
| "grad_norm": 2.3475646101301897, | |
| "learning_rate": 4.308897593732513e-06, | |
| "loss": 0.509, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 2.7652830188679243, | |
| "grad_norm": 0.2544934651097907, | |
| "learning_rate": 4.280917739227756e-06, | |
| "loss": 0.5145, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 2.7667924528301886, | |
| "grad_norm": 0.2634896263966863, | |
| "learning_rate": 4.2529378847229995e-06, | |
| "loss": 0.5485, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 2.768301886792453, | |
| "grad_norm": 0.25651084274796315, | |
| "learning_rate": 4.224958030218243e-06, | |
| "loss": 0.5134, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 2.769811320754717, | |
| "grad_norm": 0.2646923542784634, | |
| "learning_rate": 4.196978175713487e-06, | |
| "loss": 0.5164, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 2.771320754716981, | |
| "grad_norm": 0.26044964960565437, | |
| "learning_rate": 4.1689983212087305e-06, | |
| "loss": 0.5161, | |
| "step": 1837 | |
| }, | |
| { | |
| "epoch": 2.7728301886792455, | |
| "grad_norm": 0.231525130140077, | |
| "learning_rate": 4.141018466703973e-06, | |
| "loss": 0.4982, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 2.7743396226415094, | |
| "grad_norm": 0.25942703847568144, | |
| "learning_rate": 4.113038612199216e-06, | |
| "loss": 0.5105, | |
| "step": 1839 | |
| }, | |
| { | |
| "epoch": 2.7758490566037737, | |
| "grad_norm": 0.26605983694964946, | |
| "learning_rate": 4.08505875769446e-06, | |
| "loss": 0.5111, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.777358490566038, | |
| "grad_norm": 0.24395856253509673, | |
| "learning_rate": 4.057078903189704e-06, | |
| "loss": 0.4747, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 2.778867924528302, | |
| "grad_norm": 0.26184813710048754, | |
| "learning_rate": 4.029099048684947e-06, | |
| "loss": 0.5157, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 2.7803773584905658, | |
| "grad_norm": 0.26268875875244085, | |
| "learning_rate": 4.001119194180191e-06, | |
| "loss": 0.5598, | |
| "step": 1843 | |
| }, | |
| { | |
| "epoch": 2.78188679245283, | |
| "grad_norm": 0.23009142240299044, | |
| "learning_rate": 3.973139339675434e-06, | |
| "loss": 0.4734, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 2.7833962264150944, | |
| "grad_norm": 0.2458756232178251, | |
| "learning_rate": 3.945159485170677e-06, | |
| "loss": 0.4777, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 2.7849056603773583, | |
| "grad_norm": 0.24794152608284992, | |
| "learning_rate": 3.917179630665921e-06, | |
| "loss": 0.4422, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 2.7864150943396226, | |
| "grad_norm": 0.24897274231650662, | |
| "learning_rate": 3.889199776161164e-06, | |
| "loss": 0.5042, | |
| "step": 1847 | |
| }, | |
| { | |
| "epoch": 2.787924528301887, | |
| "grad_norm": 0.26060926813065055, | |
| "learning_rate": 3.8612199216564075e-06, | |
| "loss": 0.4873, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 2.789433962264151, | |
| "grad_norm": 0.23848484427152364, | |
| "learning_rate": 3.833240067151651e-06, | |
| "loss": 0.5234, | |
| "step": 1849 | |
| }, | |
| { | |
| "epoch": 2.790943396226415, | |
| "grad_norm": 0.250353041090764, | |
| "learning_rate": 3.8052602126468946e-06, | |
| "loss": 0.5231, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.7924528301886795, | |
| "grad_norm": 0.24066679801616733, | |
| "learning_rate": 3.777280358142138e-06, | |
| "loss": 0.5212, | |
| "step": 1851 | |
| }, | |
| { | |
| "epoch": 2.7939622641509434, | |
| "grad_norm": 0.2600941674746975, | |
| "learning_rate": 3.7493005036373813e-06, | |
| "loss": 0.5084, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 2.7954716981132073, | |
| "grad_norm": 0.2510555438696239, | |
| "learning_rate": 3.7213206491326243e-06, | |
| "loss": 0.5511, | |
| "step": 1853 | |
| }, | |
| { | |
| "epoch": 2.7969811320754716, | |
| "grad_norm": 0.24160412240772206, | |
| "learning_rate": 3.6933407946278685e-06, | |
| "loss": 0.5095, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 2.798490566037736, | |
| "grad_norm": 0.2748568322627208, | |
| "learning_rate": 3.6653609401231114e-06, | |
| "loss": 0.5496, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.2584358586285045, | |
| "learning_rate": 3.637381085618355e-06, | |
| "loss": 0.5463, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 2.801509433962264, | |
| "grad_norm": 0.2498517036300968, | |
| "learning_rate": 3.609401231113598e-06, | |
| "loss": 0.5415, | |
| "step": 1857 | |
| }, | |
| { | |
| "epoch": 2.8030188679245285, | |
| "grad_norm": 0.24524690085464163, | |
| "learning_rate": 3.581421376608842e-06, | |
| "loss": 0.5375, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 2.8045283018867924, | |
| "grad_norm": 0.2769513128398129, | |
| "learning_rate": 3.5534415221040853e-06, | |
| "loss": 0.552, | |
| "step": 1859 | |
| }, | |
| { | |
| "epoch": 2.8060377358490567, | |
| "grad_norm": 0.2506846446168906, | |
| "learning_rate": 3.5254616675993287e-06, | |
| "loss": 0.5459, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.807547169811321, | |
| "grad_norm": 0.2554286512869799, | |
| "learning_rate": 3.497481813094572e-06, | |
| "loss": 0.4944, | |
| "step": 1861 | |
| }, | |
| { | |
| "epoch": 2.809056603773585, | |
| "grad_norm": 0.23601391002584438, | |
| "learning_rate": 3.469501958589815e-06, | |
| "loss": 0.4922, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 2.810566037735849, | |
| "grad_norm": 0.26302780246456897, | |
| "learning_rate": 3.441522104085059e-06, | |
| "loss": 0.5044, | |
| "step": 1863 | |
| }, | |
| { | |
| "epoch": 2.812075471698113, | |
| "grad_norm": 0.2577873934898228, | |
| "learning_rate": 3.4135422495803025e-06, | |
| "loss": 0.5334, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 2.8135849056603774, | |
| "grad_norm": 0.23924590337032847, | |
| "learning_rate": 3.3855623950755455e-06, | |
| "loss": 0.5229, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 2.8150943396226413, | |
| "grad_norm": 0.24147365730794057, | |
| "learning_rate": 3.357582540570789e-06, | |
| "loss": 0.5122, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 2.8166037735849057, | |
| "grad_norm": 0.2327138561482277, | |
| "learning_rate": 3.329602686066033e-06, | |
| "loss": 0.4637, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 2.81811320754717, | |
| "grad_norm": 0.2750595632862016, | |
| "learning_rate": 3.301622831561276e-06, | |
| "loss": 0.4651, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 2.819622641509434, | |
| "grad_norm": 0.25380188643044477, | |
| "learning_rate": 3.2736429770565194e-06, | |
| "loss": 0.5683, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 2.821132075471698, | |
| "grad_norm": 0.24310252434490498, | |
| "learning_rate": 3.2456631225517627e-06, | |
| "loss": 0.5079, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.8226415094339625, | |
| "grad_norm": 0.22968025157830738, | |
| "learning_rate": 3.2176832680470065e-06, | |
| "loss": 0.4756, | |
| "step": 1871 | |
| }, | |
| { | |
| "epoch": 2.8241509433962264, | |
| "grad_norm": 0.23406333934374154, | |
| "learning_rate": 3.18970341354225e-06, | |
| "loss": 0.508, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 2.8256603773584903, | |
| "grad_norm": 3.331648704661487, | |
| "learning_rate": 3.1617235590374932e-06, | |
| "loss": 0.5314, | |
| "step": 1873 | |
| }, | |
| { | |
| "epoch": 2.8271698113207546, | |
| "grad_norm": 0.2695647874070789, | |
| "learning_rate": 3.1337437045327366e-06, | |
| "loss": 0.5471, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 2.828679245283019, | |
| "grad_norm": 0.25565203431365335, | |
| "learning_rate": 3.10576385002798e-06, | |
| "loss": 0.4956, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 2.830188679245283, | |
| "grad_norm": 0.24812316022229003, | |
| "learning_rate": 3.0777839955232233e-06, | |
| "loss": 0.5063, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 2.831698113207547, | |
| "grad_norm": 0.2512898425536453, | |
| "learning_rate": 3.0498041410184667e-06, | |
| "loss": 0.5132, | |
| "step": 1877 | |
| }, | |
| { | |
| "epoch": 2.8332075471698115, | |
| "grad_norm": 0.25905997230865613, | |
| "learning_rate": 3.02182428651371e-06, | |
| "loss": 0.536, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 2.8347169811320754, | |
| "grad_norm": 0.2527084751717376, | |
| "learning_rate": 2.993844432008954e-06, | |
| "loss": 0.5058, | |
| "step": 1879 | |
| }, | |
| { | |
| "epoch": 2.8362264150943397, | |
| "grad_norm": 0.25014337600940817, | |
| "learning_rate": 2.965864577504197e-06, | |
| "loss": 0.5141, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.837735849056604, | |
| "grad_norm": 0.26771202447798464, | |
| "learning_rate": 2.9378847229994406e-06, | |
| "loss": 0.4997, | |
| "step": 1881 | |
| }, | |
| { | |
| "epoch": 2.839245283018868, | |
| "grad_norm": 0.3542671459269332, | |
| "learning_rate": 2.909904868494684e-06, | |
| "loss": 0.5118, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 2.840754716981132, | |
| "grad_norm": 0.29850083758472223, | |
| "learning_rate": 2.8819250139899277e-06, | |
| "loss": 0.5432, | |
| "step": 1883 | |
| }, | |
| { | |
| "epoch": 2.842264150943396, | |
| "grad_norm": 0.23709430578888918, | |
| "learning_rate": 2.8539451594851706e-06, | |
| "loss": 0.5221, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 2.8437735849056605, | |
| "grad_norm": 0.26493114426048814, | |
| "learning_rate": 2.8259653049804144e-06, | |
| "loss": 0.5103, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 2.8452830188679243, | |
| "grad_norm": 0.23960026530705572, | |
| "learning_rate": 2.797985450475658e-06, | |
| "loss": 0.4965, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 2.8467924528301887, | |
| "grad_norm": 0.251820373185035, | |
| "learning_rate": 2.7700055959709007e-06, | |
| "loss": 0.512, | |
| "step": 1887 | |
| }, | |
| { | |
| "epoch": 2.848301886792453, | |
| "grad_norm": 0.2716135014490187, | |
| "learning_rate": 2.7420257414661445e-06, | |
| "loss": 0.5373, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 2.849811320754717, | |
| "grad_norm": 0.3050812952561557, | |
| "learning_rate": 2.714045886961388e-06, | |
| "loss": 0.4955, | |
| "step": 1889 | |
| }, | |
| { | |
| "epoch": 2.851320754716981, | |
| "grad_norm": 0.23801738006267384, | |
| "learning_rate": 2.6860660324566312e-06, | |
| "loss": 0.4919, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.8528301886792455, | |
| "grad_norm": 0.23926038110207012, | |
| "learning_rate": 2.6580861779518746e-06, | |
| "loss": 0.486, | |
| "step": 1891 | |
| }, | |
| { | |
| "epoch": 2.8543396226415094, | |
| "grad_norm": 0.24625086888055978, | |
| "learning_rate": 2.6301063234471184e-06, | |
| "loss": 0.563, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.8558490566037738, | |
| "grad_norm": 0.25559463779086583, | |
| "learning_rate": 2.6021264689423613e-06, | |
| "loss": 0.4613, | |
| "step": 1893 | |
| }, | |
| { | |
| "epoch": 2.8573584905660376, | |
| "grad_norm": 0.2437794999089875, | |
| "learning_rate": 2.574146614437605e-06, | |
| "loss": 0.5394, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 2.858867924528302, | |
| "grad_norm": 0.24054866492710766, | |
| "learning_rate": 2.5461667599328485e-06, | |
| "loss": 0.5187, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 2.860377358490566, | |
| "grad_norm": 0.24376318441166045, | |
| "learning_rate": 2.518186905428092e-06, | |
| "loss": 0.4887, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 2.86188679245283, | |
| "grad_norm": 0.2410895191336604, | |
| "learning_rate": 2.4902070509233352e-06, | |
| "loss": 0.4956, | |
| "step": 1897 | |
| }, | |
| { | |
| "epoch": 2.8633962264150945, | |
| "grad_norm": 0.2389979905294272, | |
| "learning_rate": 2.462227196418579e-06, | |
| "loss": 0.5055, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 2.8649056603773584, | |
| "grad_norm": 0.24099313691698937, | |
| "learning_rate": 2.4342473419138224e-06, | |
| "loss": 0.4998, | |
| "step": 1899 | |
| }, | |
| { | |
| "epoch": 2.8664150943396227, | |
| "grad_norm": 0.24527579020611284, | |
| "learning_rate": 2.4062674874090653e-06, | |
| "loss": 0.5264, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.867924528301887, | |
| "grad_norm": 0.23933465605234516, | |
| "learning_rate": 2.378287632904309e-06, | |
| "loss": 0.4959, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 2.869433962264151, | |
| "grad_norm": 0.24622168013990314, | |
| "learning_rate": 2.3503077783995524e-06, | |
| "loss": 0.5173, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 2.8709433962264153, | |
| "grad_norm": 0.23919977406233273, | |
| "learning_rate": 2.322327923894796e-06, | |
| "loss": 0.4966, | |
| "step": 1903 | |
| }, | |
| { | |
| "epoch": 2.872452830188679, | |
| "grad_norm": 0.261296807681531, | |
| "learning_rate": 2.294348069390039e-06, | |
| "loss": 0.5598, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 2.8739622641509435, | |
| "grad_norm": 0.2799512370951401, | |
| "learning_rate": 2.266368214885283e-06, | |
| "loss": 0.4819, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 2.8754716981132074, | |
| "grad_norm": 0.23214062234311064, | |
| "learning_rate": 2.238388360380526e-06, | |
| "loss": 0.5261, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 2.8769811320754717, | |
| "grad_norm": 0.24329861142019843, | |
| "learning_rate": 2.2104085058757697e-06, | |
| "loss": 0.5273, | |
| "step": 1907 | |
| }, | |
| { | |
| "epoch": 2.878490566037736, | |
| "grad_norm": 0.24170422215332774, | |
| "learning_rate": 2.182428651371013e-06, | |
| "loss": 0.4899, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 0.21557021451090852, | |
| "learning_rate": 2.1544487968662564e-06, | |
| "loss": 0.4499, | |
| "step": 1909 | |
| }, | |
| { | |
| "epoch": 2.881509433962264, | |
| "grad_norm": 0.24761336600665296, | |
| "learning_rate": 2.1264689423614998e-06, | |
| "loss": 0.5349, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.8830188679245285, | |
| "grad_norm": 0.28587028512046125, | |
| "learning_rate": 2.0984890878567436e-06, | |
| "loss": 0.4769, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 2.8845283018867924, | |
| "grad_norm": 0.23215345407826837, | |
| "learning_rate": 2.0705092333519865e-06, | |
| "loss": 0.5057, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 2.8860377358490568, | |
| "grad_norm": 0.23557540955317524, | |
| "learning_rate": 2.04252937884723e-06, | |
| "loss": 0.4983, | |
| "step": 1913 | |
| }, | |
| { | |
| "epoch": 2.8875471698113206, | |
| "grad_norm": 0.24826597774453552, | |
| "learning_rate": 2.0145495243424736e-06, | |
| "loss": 0.506, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 2.889056603773585, | |
| "grad_norm": 0.27097286067099186, | |
| "learning_rate": 1.986569669837717e-06, | |
| "loss": 0.4824, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 2.890566037735849, | |
| "grad_norm": 0.2323466023436389, | |
| "learning_rate": 1.9585898153329604e-06, | |
| "loss": 0.49, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 2.892075471698113, | |
| "grad_norm": 0.24530440733332493, | |
| "learning_rate": 1.9306099608282037e-06, | |
| "loss": 0.5128, | |
| "step": 1917 | |
| }, | |
| { | |
| "epoch": 2.8935849056603775, | |
| "grad_norm": 0.2217381642822851, | |
| "learning_rate": 1.9026301063234473e-06, | |
| "loss": 0.4941, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 2.8950943396226414, | |
| "grad_norm": 0.3033421523357643, | |
| "learning_rate": 1.8746502518186907e-06, | |
| "loss": 0.5488, | |
| "step": 1919 | |
| }, | |
| { | |
| "epoch": 2.8966037735849057, | |
| "grad_norm": 0.22952196943803127, | |
| "learning_rate": 1.8466703973139342e-06, | |
| "loss": 0.4873, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.89811320754717, | |
| "grad_norm": 0.24646380697980175, | |
| "learning_rate": 1.8186905428091774e-06, | |
| "loss": 0.5466, | |
| "step": 1921 | |
| }, | |
| { | |
| "epoch": 2.899622641509434, | |
| "grad_norm": 0.2931648546342374, | |
| "learning_rate": 1.790710688304421e-06, | |
| "loss": 0.494, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 2.9011320754716983, | |
| "grad_norm": 0.6019713876730823, | |
| "learning_rate": 1.7627308337996643e-06, | |
| "loss": 0.5361, | |
| "step": 1923 | |
| }, | |
| { | |
| "epoch": 2.902641509433962, | |
| "grad_norm": 0.2643446098419106, | |
| "learning_rate": 1.7347509792949075e-06, | |
| "loss": 0.51, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 2.9041509433962265, | |
| "grad_norm": 0.24476304649977285, | |
| "learning_rate": 1.7067711247901513e-06, | |
| "loss": 0.5391, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 2.9056603773584904, | |
| "grad_norm": 0.24501363596775003, | |
| "learning_rate": 1.6787912702853944e-06, | |
| "loss": 0.5126, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 2.9071698113207547, | |
| "grad_norm": 0.23738294037707477, | |
| "learning_rate": 1.650811415780638e-06, | |
| "loss": 0.4981, | |
| "step": 1927 | |
| }, | |
| { | |
| "epoch": 2.908679245283019, | |
| "grad_norm": 0.2353858028048875, | |
| "learning_rate": 1.6228315612758814e-06, | |
| "loss": 0.5393, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 2.910188679245283, | |
| "grad_norm": 0.24557576052226965, | |
| "learning_rate": 1.594851706771125e-06, | |
| "loss": 0.5146, | |
| "step": 1929 | |
| }, | |
| { | |
| "epoch": 2.9116981132075472, | |
| "grad_norm": 0.23598740859198736, | |
| "learning_rate": 1.5668718522663683e-06, | |
| "loss": 0.5069, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.9132075471698116, | |
| "grad_norm": 0.23965223087346654, | |
| "learning_rate": 1.5388919977616117e-06, | |
| "loss": 0.5293, | |
| "step": 1931 | |
| }, | |
| { | |
| "epoch": 2.9147169811320754, | |
| "grad_norm": 0.23578049631296016, | |
| "learning_rate": 1.510912143256855e-06, | |
| "loss": 0.5237, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 2.9162264150943398, | |
| "grad_norm": 0.2512094274756097, | |
| "learning_rate": 1.4829322887520986e-06, | |
| "loss": 0.5305, | |
| "step": 1933 | |
| }, | |
| { | |
| "epoch": 2.9177358490566037, | |
| "grad_norm": 0.2419640704406858, | |
| "learning_rate": 1.454952434247342e-06, | |
| "loss": 0.5019, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 2.919245283018868, | |
| "grad_norm": 0.2341231605907135, | |
| "learning_rate": 1.4269725797425853e-06, | |
| "loss": 0.493, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 2.920754716981132, | |
| "grad_norm": 0.22368464083267006, | |
| "learning_rate": 1.398992725237829e-06, | |
| "loss": 0.479, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 2.922264150943396, | |
| "grad_norm": 0.23934904699725973, | |
| "learning_rate": 1.3710128707330723e-06, | |
| "loss": 0.5182, | |
| "step": 1937 | |
| }, | |
| { | |
| "epoch": 2.9237735849056605, | |
| "grad_norm": 0.2423304768056013, | |
| "learning_rate": 1.3430330162283156e-06, | |
| "loss": 0.5287, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 2.9252830188679244, | |
| "grad_norm": 0.24220829389874426, | |
| "learning_rate": 1.3150531617235592e-06, | |
| "loss": 0.5131, | |
| "step": 1939 | |
| }, | |
| { | |
| "epoch": 2.9267924528301887, | |
| "grad_norm": 0.2408649756673509, | |
| "learning_rate": 1.2870733072188026e-06, | |
| "loss": 0.5256, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.928301886792453, | |
| "grad_norm": 0.26970041615884593, | |
| "learning_rate": 1.259093452714046e-06, | |
| "loss": 0.5247, | |
| "step": 1941 | |
| }, | |
| { | |
| "epoch": 2.929811320754717, | |
| "grad_norm": 0.2462934836700248, | |
| "learning_rate": 1.2311135982092895e-06, | |
| "loss": 0.5274, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 2.9313207547169813, | |
| "grad_norm": 0.24346784211748765, | |
| "learning_rate": 1.2031337437045327e-06, | |
| "loss": 0.5353, | |
| "step": 1943 | |
| }, | |
| { | |
| "epoch": 2.932830188679245, | |
| "grad_norm": 0.2308252685210892, | |
| "learning_rate": 1.1751538891997762e-06, | |
| "loss": 0.4403, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 2.9343396226415095, | |
| "grad_norm": 0.24611093220055966, | |
| "learning_rate": 1.1471740346950196e-06, | |
| "loss": 0.4911, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 2.9358490566037734, | |
| "grad_norm": 0.23677755966512776, | |
| "learning_rate": 1.119194180190263e-06, | |
| "loss": 0.4889, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 2.9373584905660377, | |
| "grad_norm": 0.2390396184181226, | |
| "learning_rate": 1.0912143256855065e-06, | |
| "loss": 0.4938, | |
| "step": 1947 | |
| }, | |
| { | |
| "epoch": 2.938867924528302, | |
| "grad_norm": 0.2298114470810896, | |
| "learning_rate": 1.0632344711807499e-06, | |
| "loss": 0.5164, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 2.940377358490566, | |
| "grad_norm": 0.23867767626825967, | |
| "learning_rate": 1.0352546166759932e-06, | |
| "loss": 0.5339, | |
| "step": 1949 | |
| }, | |
| { | |
| "epoch": 2.9418867924528302, | |
| "grad_norm": 0.2273239656737244, | |
| "learning_rate": 1.0072747621712368e-06, | |
| "loss": 0.4873, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.9433962264150946, | |
| "grad_norm": 0.2510529360776652, | |
| "learning_rate": 9.792949076664802e-07, | |
| "loss": 0.541, | |
| "step": 1951 | |
| }, | |
| { | |
| "epoch": 2.9449056603773585, | |
| "grad_norm": 0.2420207367421682, | |
| "learning_rate": 9.513150531617237e-07, | |
| "loss": 0.5357, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 2.946415094339623, | |
| "grad_norm": 0.23866840294873037, | |
| "learning_rate": 9.233351986569671e-07, | |
| "loss": 0.516, | |
| "step": 1953 | |
| }, | |
| { | |
| "epoch": 2.9479245283018867, | |
| "grad_norm": 0.23833048632270618, | |
| "learning_rate": 8.953553441522105e-07, | |
| "loss": 0.5001, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 2.949433962264151, | |
| "grad_norm": 0.23240358196803712, | |
| "learning_rate": 8.673754896474537e-07, | |
| "loss": 0.5263, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 2.950943396226415, | |
| "grad_norm": 0.27536994744227083, | |
| "learning_rate": 8.393956351426972e-07, | |
| "loss": 0.4919, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 2.952452830188679, | |
| "grad_norm": 0.23738342408866947, | |
| "learning_rate": 8.114157806379407e-07, | |
| "loss": 0.4924, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 2.9539622641509435, | |
| "grad_norm": 0.2227032657641613, | |
| "learning_rate": 7.834359261331841e-07, | |
| "loss": 0.4912, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 2.9554716981132074, | |
| "grad_norm": 0.23463528292989802, | |
| "learning_rate": 7.554560716284275e-07, | |
| "loss": 0.4838, | |
| "step": 1959 | |
| }, | |
| { | |
| "epoch": 2.9569811320754718, | |
| "grad_norm": 0.2351782144870339, | |
| "learning_rate": 7.27476217123671e-07, | |
| "loss": 0.5385, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.958490566037736, | |
| "grad_norm": 0.30869561893156994, | |
| "learning_rate": 6.994963626189144e-07, | |
| "loss": 0.5375, | |
| "step": 1961 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.2507617576529779, | |
| "learning_rate": 6.715165081141578e-07, | |
| "loss": 0.5086, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 2.9615094339622643, | |
| "grad_norm": 0.26263822536508746, | |
| "learning_rate": 6.435366536094013e-07, | |
| "loss": 0.5411, | |
| "step": 1963 | |
| }, | |
| { | |
| "epoch": 2.963018867924528, | |
| "grad_norm": 0.22776692640780824, | |
| "learning_rate": 6.155567991046447e-07, | |
| "loss": 0.4882, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 2.9645283018867925, | |
| "grad_norm": 0.23187613900036108, | |
| "learning_rate": 5.875769445998881e-07, | |
| "loss": 0.5031, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 2.9660377358490564, | |
| "grad_norm": 0.23980976100159304, | |
| "learning_rate": 5.595970900951315e-07, | |
| "loss": 0.5111, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 2.9675471698113207, | |
| "grad_norm": 0.23454625576676985, | |
| "learning_rate": 5.316172355903749e-07, | |
| "loss": 0.4886, | |
| "step": 1967 | |
| }, | |
| { | |
| "epoch": 2.969056603773585, | |
| "grad_norm": 0.22466469775472325, | |
| "learning_rate": 5.036373810856184e-07, | |
| "loss": 0.4927, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 2.970566037735849, | |
| "grad_norm": 0.2424670877215881, | |
| "learning_rate": 4.7565752658086183e-07, | |
| "loss": 0.5188, | |
| "step": 1969 | |
| }, | |
| { | |
| "epoch": 2.9720754716981133, | |
| "grad_norm": 0.2634277432088806, | |
| "learning_rate": 4.4767767207610524e-07, | |
| "loss": 0.4995, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.9735849056603776, | |
| "grad_norm": 0.24871804164038816, | |
| "learning_rate": 4.196978175713486e-07, | |
| "loss": 0.4975, | |
| "step": 1971 | |
| }, | |
| { | |
| "epoch": 2.9750943396226415, | |
| "grad_norm": 0.22918011887355255, | |
| "learning_rate": 3.917179630665921e-07, | |
| "loss": 0.5146, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 2.976603773584906, | |
| "grad_norm": 0.24494562015577118, | |
| "learning_rate": 3.637381085618355e-07, | |
| "loss": 0.5191, | |
| "step": 1973 | |
| }, | |
| { | |
| "epoch": 2.9781132075471697, | |
| "grad_norm": 0.24554508049209534, | |
| "learning_rate": 3.357582540570789e-07, | |
| "loss": 0.5134, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 2.979622641509434, | |
| "grad_norm": 0.24686957526571518, | |
| "learning_rate": 3.077783995523224e-07, | |
| "loss": 0.4754, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 2.981132075471698, | |
| "grad_norm": 0.2309101984662458, | |
| "learning_rate": 2.7979854504756574e-07, | |
| "loss": 0.4805, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 2.9826415094339622, | |
| "grad_norm": 0.23759627074403436, | |
| "learning_rate": 2.518186905428092e-07, | |
| "loss": 0.5359, | |
| "step": 1977 | |
| }, | |
| { | |
| "epoch": 2.9841509433962266, | |
| "grad_norm": 0.25434227095914247, | |
| "learning_rate": 2.2383883603805262e-07, | |
| "loss": 0.5258, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 2.9856603773584904, | |
| "grad_norm": 0.24774479007599787, | |
| "learning_rate": 1.9585898153329604e-07, | |
| "loss": 0.5029, | |
| "step": 1979 | |
| }, | |
| { | |
| "epoch": 2.9871698113207548, | |
| "grad_norm": 0.2562930857219345, | |
| "learning_rate": 1.6787912702853945e-07, | |
| "loss": 0.5213, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.988679245283019, | |
| "grad_norm": 0.24488783175852774, | |
| "learning_rate": 1.3989927252378287e-07, | |
| "loss": 0.5062, | |
| "step": 1981 | |
| }, | |
| { | |
| "epoch": 2.990188679245283, | |
| "grad_norm": 0.24061088726491453, | |
| "learning_rate": 1.1191941801902631e-07, | |
| "loss": 0.5221, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 2.9916981132075473, | |
| "grad_norm": 0.23745813306754143, | |
| "learning_rate": 8.393956351426973e-08, | |
| "loss": 0.4621, | |
| "step": 1983 | |
| }, | |
| { | |
| "epoch": 2.993207547169811, | |
| "grad_norm": 0.24766707785226677, | |
| "learning_rate": 5.5959709009513155e-08, | |
| "loss": 0.5439, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 2.9947169811320755, | |
| "grad_norm": 0.22755699352891728, | |
| "learning_rate": 2.7979854504756578e-08, | |
| "loss": 0.4968, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 2.9962264150943394, | |
| "grad_norm": 0.2639171679918981, | |
| "learning_rate": 0.0, | |
| "loss": 0.5176, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 2.9962264150943394, | |
| "step": 1986, | |
| "total_flos": 1.6837767016679997e+18, | |
| "train_loss": 0.7350575219978619, | |
| "train_runtime": 115424.0134, | |
| "train_samples_per_second": 0.275, | |
| "train_steps_per_second": 0.017 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1986, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.6837767016679997e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |