Instructions to use wonwonn/pcagent_human_only_adapter with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use wonwonn/pcagent_human_only_adapter with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct") model = PeftModel.from_pretrained(base_model, "wonwonn/pcagent_human_only_adapter") - Transformers
How to use wonwonn/pcagent_human_only_adapter with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="wonwonn/pcagent_human_only_adapter") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("wonwonn/pcagent_human_only_adapter", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use wonwonn/pcagent_human_only_adapter with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "wonwonn/pcagent_human_only_adapter" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wonwonn/pcagent_human_only_adapter", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/wonwonn/pcagent_human_only_adapter
- SGLang
How to use wonwonn/pcagent_human_only_adapter with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "wonwonn/pcagent_human_only_adapter" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wonwonn/pcagent_human_only_adapter", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "wonwonn/pcagent_human_only_adapter" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "wonwonn/pcagent_human_only_adapter", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use wonwonn/pcagent_human_only_adapter with Docker Model Runner:
docker model run hf.co/wonwonn/pcagent_human_only_adapter
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 1388, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001440922190201729, | |
| "grad_norm": 0.7939718961715698, | |
| "learning_rate": 0.0, | |
| "loss": 1.1426048278808594, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002881844380403458, | |
| "grad_norm": 0.7847036123275757, | |
| "learning_rate": 2.8571428571428575e-07, | |
| "loss": 1.2942605018615723, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.004322766570605188, | |
| "grad_norm": 0.8253518342971802, | |
| "learning_rate": 5.714285714285715e-07, | |
| "loss": 1.283416748046875, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.005763688760806916, | |
| "grad_norm": 0.8199485540390015, | |
| "learning_rate": 8.571428571428572e-07, | |
| "loss": 1.3191412687301636, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.007204610951008645, | |
| "grad_norm": 0.7191706895828247, | |
| "learning_rate": 1.142857142857143e-06, | |
| "loss": 1.127804160118103, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.008645533141210375, | |
| "grad_norm": 0.7180572748184204, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.280735969543457, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.010086455331412104, | |
| "grad_norm": 0.7501729726791382, | |
| "learning_rate": 1.7142857142857145e-06, | |
| "loss": 1.1184178590774536, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.011527377521613832, | |
| "grad_norm": 0.7057927846908569, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.2702372074127197, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.012968299711815562, | |
| "grad_norm": 0.6871187686920166, | |
| "learning_rate": 2.285714285714286e-06, | |
| "loss": 1.2448116540908813, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.01440922190201729, | |
| "grad_norm": 0.8333551287651062, | |
| "learning_rate": 2.571428571428571e-06, | |
| "loss": 1.1602749824523926, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01585014409221902, | |
| "grad_norm": 0.7824198007583618, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 1.3133487701416016, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.01729106628242075, | |
| "grad_norm": 0.7421954274177551, | |
| "learning_rate": 3.142857142857143e-06, | |
| "loss": 1.4090148210525513, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.018731988472622477, | |
| "grad_norm": 0.7107607126235962, | |
| "learning_rate": 3.428571428571429e-06, | |
| "loss": 1.0468783378601074, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.020172910662824207, | |
| "grad_norm": 0.7520753145217896, | |
| "learning_rate": 3.7142857142857146e-06, | |
| "loss": 1.2376011610031128, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.021613832853025938, | |
| "grad_norm": 0.7466248273849487, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.1968696117401123, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.023054755043227664, | |
| "grad_norm": 0.6783359050750732, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 1.2191252708435059, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.024495677233429394, | |
| "grad_norm": 0.6667131781578064, | |
| "learning_rate": 4.571428571428572e-06, | |
| "loss": 1.088866114616394, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.025936599423631124, | |
| "grad_norm": 0.6895946860313416, | |
| "learning_rate": 4.857142857142858e-06, | |
| "loss": 1.3629319667816162, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.027377521613832854, | |
| "grad_norm": 0.6927962303161621, | |
| "learning_rate": 5.142857142857142e-06, | |
| "loss": 1.1627918481826782, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.02881844380403458, | |
| "grad_norm": 0.5862833857536316, | |
| "learning_rate": 5.428571428571429e-06, | |
| "loss": 0.9320468902587891, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.03025936599423631, | |
| "grad_norm": 0.5858862400054932, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 1.1411528587341309, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.03170028818443804, | |
| "grad_norm": 0.691691517829895, | |
| "learning_rate": 6e-06, | |
| "loss": 0.9531513452529907, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.03314121037463977, | |
| "grad_norm": 0.49931254982948303, | |
| "learning_rate": 6.285714285714286e-06, | |
| "loss": 0.949053943157196, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0345821325648415, | |
| "grad_norm": 0.5515104532241821, | |
| "learning_rate": 6.571428571428572e-06, | |
| "loss": 1.3075839281082153, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.03602305475504323, | |
| "grad_norm": 0.6252418160438538, | |
| "learning_rate": 6.857142857142858e-06, | |
| "loss": 1.134469985961914, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.037463976945244955, | |
| "grad_norm": 0.4665541648864746, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.9274802207946777, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.03890489913544669, | |
| "grad_norm": 0.47294583916664124, | |
| "learning_rate": 7.428571428571429e-06, | |
| "loss": 0.9820688962936401, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.040345821325648415, | |
| "grad_norm": 0.4679860770702362, | |
| "learning_rate": 7.714285714285716e-06, | |
| "loss": 1.0257033109664917, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.04178674351585014, | |
| "grad_norm": 0.44461655616760254, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.8461464643478394, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.043227665706051875, | |
| "grad_norm": 0.5527508854866028, | |
| "learning_rate": 8.285714285714287e-06, | |
| "loss": 1.014765739440918, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0446685878962536, | |
| "grad_norm": 0.44049155712127686, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 1.056575059890747, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.04610951008645533, | |
| "grad_norm": 0.46883395314216614, | |
| "learning_rate": 8.857142857142858e-06, | |
| "loss": 1.078040599822998, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.04755043227665706, | |
| "grad_norm": 0.40662866830825806, | |
| "learning_rate": 9.142857142857144e-06, | |
| "loss": 1.0002977848052979, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.04899135446685879, | |
| "grad_norm": 0.4658549129962921, | |
| "learning_rate": 9.42857142857143e-06, | |
| "loss": 1.2170262336730957, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.05043227665706052, | |
| "grad_norm": 0.43307650089263916, | |
| "learning_rate": 9.714285714285715e-06, | |
| "loss": 1.0410033464431763, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.05187319884726225, | |
| "grad_norm": 0.4385557174682617, | |
| "learning_rate": 1e-05, | |
| "loss": 1.0051664113998413, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.053314121037463975, | |
| "grad_norm": 0.376889705657959, | |
| "learning_rate": 1.0285714285714285e-05, | |
| "loss": 0.9119026064872742, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.05475504322766571, | |
| "grad_norm": 0.4563588798046112, | |
| "learning_rate": 1.0571428571428572e-05, | |
| "loss": 1.0321323871612549, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.056195965417867436, | |
| "grad_norm": 0.42952460050582886, | |
| "learning_rate": 1.0857142857142858e-05, | |
| "loss": 1.1022179126739502, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.05763688760806916, | |
| "grad_norm": 0.4148353934288025, | |
| "learning_rate": 1.1142857142857143e-05, | |
| "loss": 1.003936767578125, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.059077809798270896, | |
| "grad_norm": 0.4105982482433319, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 0.9413349628448486, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.06051873198847262, | |
| "grad_norm": 0.4209625720977783, | |
| "learning_rate": 1.1714285714285716e-05, | |
| "loss": 1.0281121730804443, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.06195965417867435, | |
| "grad_norm": 0.4445074498653412, | |
| "learning_rate": 1.2e-05, | |
| "loss": 1.103991985321045, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.06340057636887608, | |
| "grad_norm": 0.4571291506290436, | |
| "learning_rate": 1.2285714285714288e-05, | |
| "loss": 1.130021572113037, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.06484149855907781, | |
| "grad_norm": 0.3988986909389496, | |
| "learning_rate": 1.2571428571428572e-05, | |
| "loss": 0.9458773136138916, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.06628242074927954, | |
| "grad_norm": 0.44278568029403687, | |
| "learning_rate": 1.2857142857142859e-05, | |
| "loss": 0.8265559673309326, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.06772334293948126, | |
| "grad_norm": 0.4381110966205597, | |
| "learning_rate": 1.3142857142857145e-05, | |
| "loss": 0.8585218191146851, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.069164265129683, | |
| "grad_norm": 0.436262845993042, | |
| "learning_rate": 1.3428571428571429e-05, | |
| "loss": 1.0744308233261108, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.07060518731988473, | |
| "grad_norm": 0.38992249965667725, | |
| "learning_rate": 1.3714285714285716e-05, | |
| "loss": 0.7727986574172974, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.07204610951008646, | |
| "grad_norm": 0.4699057340621948, | |
| "learning_rate": 1.4e-05, | |
| "loss": 1.107350468635559, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07348703170028818, | |
| "grad_norm": 0.42696720361709595, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 0.8284908533096313, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.07492795389048991, | |
| "grad_norm": 0.41258543729782104, | |
| "learning_rate": 1.4571428571428573e-05, | |
| "loss": 0.9984976649284363, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.07636887608069164, | |
| "grad_norm": 0.4343370795249939, | |
| "learning_rate": 1.4857142857142858e-05, | |
| "loss": 1.0807418823242188, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.07780979827089338, | |
| "grad_norm": 0.41640159487724304, | |
| "learning_rate": 1.5142857142857144e-05, | |
| "loss": 0.9495835304260254, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0792507204610951, | |
| "grad_norm": 0.40046796202659607, | |
| "learning_rate": 1.542857142857143e-05, | |
| "loss": 0.8888975381851196, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.08069164265129683, | |
| "grad_norm": 0.5632781386375427, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 0.7974977493286133, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.08213256484149856, | |
| "grad_norm": 0.42231452465057373, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.9832745790481567, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.08357348703170028, | |
| "grad_norm": 0.40361636877059937, | |
| "learning_rate": 1.6285714285714287e-05, | |
| "loss": 0.9870190620422363, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.08501440922190202, | |
| "grad_norm": 0.39357513189315796, | |
| "learning_rate": 1.6571428571428574e-05, | |
| "loss": 0.8166898488998413, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.08645533141210375, | |
| "grad_norm": 0.41122061014175415, | |
| "learning_rate": 1.6857142857142858e-05, | |
| "loss": 0.7634358406066895, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.08789625360230548, | |
| "grad_norm": 0.46859362721443176, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.9240067005157471, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.0893371757925072, | |
| "grad_norm": 0.45468568801879883, | |
| "learning_rate": 1.742857142857143e-05, | |
| "loss": 0.8535733819007874, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.09077809798270893, | |
| "grad_norm": 0.4963301718235016, | |
| "learning_rate": 1.7714285714285717e-05, | |
| "loss": 1.0492768287658691, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.09221902017291066, | |
| "grad_norm": 0.4315282106399536, | |
| "learning_rate": 1.8e-05, | |
| "loss": 1.057793378829956, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0936599423631124, | |
| "grad_norm": 0.47847360372543335, | |
| "learning_rate": 1.8285714285714288e-05, | |
| "loss": 1.083353877067566, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.09510086455331412, | |
| "grad_norm": 0.4626167118549347, | |
| "learning_rate": 1.8571428571428575e-05, | |
| "loss": 1.078417420387268, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.09654178674351585, | |
| "grad_norm": 0.5010611414909363, | |
| "learning_rate": 1.885714285714286e-05, | |
| "loss": 0.9978217482566833, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.09798270893371758, | |
| "grad_norm": 0.42384764552116394, | |
| "learning_rate": 1.9142857142857146e-05, | |
| "loss": 0.8274828195571899, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.0994236311239193, | |
| "grad_norm": 0.5382806062698364, | |
| "learning_rate": 1.942857142857143e-05, | |
| "loss": 0.8650892972946167, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.10086455331412104, | |
| "grad_norm": 0.43172308802604675, | |
| "learning_rate": 1.9714285714285718e-05, | |
| "loss": 0.9850153923034668, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.10230547550432277, | |
| "grad_norm": 0.4458475410938263, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8429934978485107, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.1037463976945245, | |
| "grad_norm": 0.4776175916194916, | |
| "learning_rate": 1.999997159212662e-05, | |
| "loss": 0.9614291191101074, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.10518731988472622, | |
| "grad_norm": 0.43584567308425903, | |
| "learning_rate": 1.9999886368667875e-05, | |
| "loss": 0.7892089486122131, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.10662824207492795, | |
| "grad_norm": 0.45101606845855713, | |
| "learning_rate": 1.9999744330107972e-05, | |
| "loss": 1.1100192070007324, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.10806916426512968, | |
| "grad_norm": 0.467498242855072, | |
| "learning_rate": 1.999954547725391e-05, | |
| "loss": 0.9660458564758301, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.10951008645533142, | |
| "grad_norm": 0.49766960740089417, | |
| "learning_rate": 1.9999289811235492e-05, | |
| "loss": 1.0705103874206543, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.11095100864553314, | |
| "grad_norm": 0.43898335099220276, | |
| "learning_rate": 1.9998977333505298e-05, | |
| "loss": 0.8018485903739929, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.11239193083573487, | |
| "grad_norm": 0.44093072414398193, | |
| "learning_rate": 1.9998608045838696e-05, | |
| "loss": 0.9763551950454712, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1138328530259366, | |
| "grad_norm": 0.5379366278648376, | |
| "learning_rate": 1.9998181950333825e-05, | |
| "loss": 0.9606471061706543, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.11527377521613832, | |
| "grad_norm": 0.5516687035560608, | |
| "learning_rate": 1.999769904941157e-05, | |
| "loss": 1.0976412296295166, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11671469740634005, | |
| "grad_norm": 0.48758774995803833, | |
| "learning_rate": 1.9997159345815577e-05, | |
| "loss": 0.9517735242843628, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.11815561959654179, | |
| "grad_norm": 0.4559188485145569, | |
| "learning_rate": 1.9996562842612208e-05, | |
| "loss": 0.9503711462020874, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.11959654178674352, | |
| "grad_norm": 0.4746760129928589, | |
| "learning_rate": 1.999590954319054e-05, | |
| "loss": 0.979506254196167, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.12103746397694524, | |
| "grad_norm": 0.43155157566070557, | |
| "learning_rate": 1.9995199451262348e-05, | |
| "loss": 0.87384033203125, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.12247838616714697, | |
| "grad_norm": 0.49892765283584595, | |
| "learning_rate": 1.999443257086206e-05, | |
| "loss": 0.874519944190979, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1239193083573487, | |
| "grad_norm": 0.4722626507282257, | |
| "learning_rate": 1.9993608906346778e-05, | |
| "loss": 0.9120929837226868, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.12536023054755044, | |
| "grad_norm": 0.5804750323295593, | |
| "learning_rate": 1.9992728462396207e-05, | |
| "loss": 0.9798381924629211, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.12680115273775217, | |
| "grad_norm": 0.5119799375534058, | |
| "learning_rate": 1.9991791244012654e-05, | |
| "loss": 0.9007123708724976, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.1282420749279539, | |
| "grad_norm": 0.529379665851593, | |
| "learning_rate": 1.9990797256521e-05, | |
| "loss": 0.9782993793487549, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.12968299711815562, | |
| "grad_norm": 0.4421041011810303, | |
| "learning_rate": 1.9989746505568655e-05, | |
| "loss": 0.8241182565689087, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.13112391930835735, | |
| "grad_norm": 0.5022783279418945, | |
| "learning_rate": 1.998863899712554e-05, | |
| "loss": 1.0301092863082886, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.13256484149855907, | |
| "grad_norm": 0.4944583773612976, | |
| "learning_rate": 1.998747473748405e-05, | |
| "loss": 0.9057658910751343, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.1340057636887608, | |
| "grad_norm": 0.5021255016326904, | |
| "learning_rate": 1.9986253733259004e-05, | |
| "loss": 0.994688868522644, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.13544668587896252, | |
| "grad_norm": 0.5356181859970093, | |
| "learning_rate": 1.998497599138764e-05, | |
| "loss": 0.921332061290741, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.13688760806916425, | |
| "grad_norm": 0.5648020505905151, | |
| "learning_rate": 1.9983641519129534e-05, | |
| "loss": 0.8739620447158813, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.138328530259366, | |
| "grad_norm": 0.46064072847366333, | |
| "learning_rate": 1.99822503240666e-05, | |
| "loss": 0.8407166600227356, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.13976945244956773, | |
| "grad_norm": 0.5033822059631348, | |
| "learning_rate": 1.998080241410301e-05, | |
| "loss": 0.9476931095123291, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.14121037463976946, | |
| "grad_norm": 0.5009251236915588, | |
| "learning_rate": 1.997929779746517e-05, | |
| "loss": 0.9408062696456909, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.14265129682997119, | |
| "grad_norm": 0.5032044649124146, | |
| "learning_rate": 1.997773648270168e-05, | |
| "loss": 0.9481003284454346, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.1440922190201729, | |
| "grad_norm": 0.5312591791152954, | |
| "learning_rate": 1.997611847868326e-05, | |
| "loss": 0.8620598316192627, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14553314121037464, | |
| "grad_norm": 0.49578797817230225, | |
| "learning_rate": 1.9974443794602723e-05, | |
| "loss": 0.8824926018714905, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.14697406340057637, | |
| "grad_norm": 0.49957969784736633, | |
| "learning_rate": 1.9972712439974912e-05, | |
| "loss": 0.8620983362197876, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.1484149855907781, | |
| "grad_norm": 0.5200111865997314, | |
| "learning_rate": 1.9970924424636645e-05, | |
| "loss": 1.0327873229980469, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.14985590778097982, | |
| "grad_norm": 0.5515114068984985, | |
| "learning_rate": 1.996907975874667e-05, | |
| "loss": 0.9890369176864624, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.15129682997118155, | |
| "grad_norm": 0.621155858039856, | |
| "learning_rate": 1.9967178452785586e-05, | |
| "loss": 1.0814683437347412, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.15273775216138327, | |
| "grad_norm": 0.5430831909179688, | |
| "learning_rate": 1.9965220517555814e-05, | |
| "loss": 0.9082866907119751, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.15417867435158503, | |
| "grad_norm": 0.5123117566108704, | |
| "learning_rate": 1.9963205964181503e-05, | |
| "loss": 0.8269040584564209, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.15561959654178675, | |
| "grad_norm": 0.475583553314209, | |
| "learning_rate": 1.996113480410849e-05, | |
| "loss": 0.9455451965332031, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.15706051873198848, | |
| "grad_norm": 0.48762381076812744, | |
| "learning_rate": 1.9959007049104223e-05, | |
| "loss": 0.7549535036087036, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.1585014409221902, | |
| "grad_norm": 0.5038346648216248, | |
| "learning_rate": 1.9956822711257708e-05, | |
| "loss": 0.7314675450325012, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.15994236311239193, | |
| "grad_norm": 0.5289925932884216, | |
| "learning_rate": 1.995458180297942e-05, | |
| "loss": 0.8430821299552917, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.16138328530259366, | |
| "grad_norm": 0.48994776606559753, | |
| "learning_rate": 1.9952284337001238e-05, | |
| "loss": 0.8332911729812622, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.1628242074927954, | |
| "grad_norm": 0.536970853805542, | |
| "learning_rate": 1.9949930326376403e-05, | |
| "loss": 0.8364700078964233, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.1642651296829971, | |
| "grad_norm": 0.5265873670578003, | |
| "learning_rate": 1.994751978447939e-05, | |
| "loss": 0.9724129438400269, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.16570605187319884, | |
| "grad_norm": 0.515876293182373, | |
| "learning_rate": 1.994505272500588e-05, | |
| "loss": 0.8738645315170288, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.16714697406340057, | |
| "grad_norm": 0.49826738238334656, | |
| "learning_rate": 1.9942529161972646e-05, | |
| "loss": 0.7798672318458557, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.1685878962536023, | |
| "grad_norm": 0.4744509756565094, | |
| "learning_rate": 1.993994910971751e-05, | |
| "loss": 0.779198944568634, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.17002881844380405, | |
| "grad_norm": 0.5166738033294678, | |
| "learning_rate": 1.9937312582899224e-05, | |
| "loss": 0.8262094855308533, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.17146974063400577, | |
| "grad_norm": 0.5266196131706238, | |
| "learning_rate": 1.993461959649742e-05, | |
| "loss": 0.9057078957557678, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.1729106628242075, | |
| "grad_norm": 0.46664103865623474, | |
| "learning_rate": 1.9931870165812492e-05, | |
| "loss": 0.8845268487930298, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.17435158501440923, | |
| "grad_norm": 0.49491918087005615, | |
| "learning_rate": 1.9929064306465543e-05, | |
| "loss": 1.0086660385131836, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.17579250720461095, | |
| "grad_norm": 0.6369159817695618, | |
| "learning_rate": 1.992620203439827e-05, | |
| "loss": 0.8299688100814819, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.17723342939481268, | |
| "grad_norm": 0.5914552211761475, | |
| "learning_rate": 1.9923283365872886e-05, | |
| "loss": 0.6999752521514893, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.1786743515850144, | |
| "grad_norm": 0.5571977496147156, | |
| "learning_rate": 1.9920308317472023e-05, | |
| "loss": 0.899816632270813, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.18011527377521613, | |
| "grad_norm": 0.5898154377937317, | |
| "learning_rate": 1.9917276906098643e-05, | |
| "loss": 1.0478678941726685, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.18155619596541786, | |
| "grad_norm": 0.5443904399871826, | |
| "learning_rate": 1.991418914897593e-05, | |
| "loss": 0.8770928978919983, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1829971181556196, | |
| "grad_norm": 0.43962040543556213, | |
| "learning_rate": 1.9911045063647214e-05, | |
| "loss": 0.7189308404922485, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.1844380403458213, | |
| "grad_norm": 0.46299442648887634, | |
| "learning_rate": 1.9907844667975847e-05, | |
| "loss": 0.7834997177124023, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.18587896253602307, | |
| "grad_norm": 0.5070964694023132, | |
| "learning_rate": 1.9904587980145117e-05, | |
| "loss": 0.9666174650192261, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.1873198847262248, | |
| "grad_norm": 0.5512601733207703, | |
| "learning_rate": 1.990127501865814e-05, | |
| "loss": 1.0248420238494873, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.18876080691642652, | |
| "grad_norm": 0.4986175000667572, | |
| "learning_rate": 1.989790580233775e-05, | |
| "loss": 0.8395522832870483, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.19020172910662825, | |
| "grad_norm": 0.5736032724380493, | |
| "learning_rate": 1.989448035032641e-05, | |
| "loss": 0.9717695713043213, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.19164265129682997, | |
| "grad_norm": 0.560399055480957, | |
| "learning_rate": 1.989099868208607e-05, | |
| "loss": 0.968644917011261, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.1930835734870317, | |
| "grad_norm": 0.49599337577819824, | |
| "learning_rate": 1.9887460817398093e-05, | |
| "loss": 0.9145469069480896, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.19452449567723343, | |
| "grad_norm": 0.5436801910400391, | |
| "learning_rate": 1.9883866776363123e-05, | |
| "loss": 1.0251821279525757, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.19596541786743515, | |
| "grad_norm": 0.5881351232528687, | |
| "learning_rate": 1.9880216579400972e-05, | |
| "loss": 0.9223377108573914, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.19740634005763688, | |
| "grad_norm": 0.517955482006073, | |
| "learning_rate": 1.9876510247250506e-05, | |
| "loss": 0.8712332248687744, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.1988472622478386, | |
| "grad_norm": 0.5075203776359558, | |
| "learning_rate": 1.9872747800969526e-05, | |
| "loss": 0.9759121537208557, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.20028818443804033, | |
| "grad_norm": 0.4947361946105957, | |
| "learning_rate": 1.9868929261934657e-05, | |
| "loss": 0.920539140701294, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.2017291066282421, | |
| "grad_norm": 0.693020761013031, | |
| "learning_rate": 1.986505465184121e-05, | |
| "loss": 0.9794489145278931, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.20317002881844382, | |
| "grad_norm": 0.5060868859291077, | |
| "learning_rate": 1.986112399270307e-05, | |
| "loss": 0.9132547974586487, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.20461095100864554, | |
| "grad_norm": 0.5373198390007019, | |
| "learning_rate": 1.985713730685257e-05, | |
| "loss": 0.9875497817993164, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.20605187319884727, | |
| "grad_norm": 0.4931413531303406, | |
| "learning_rate": 1.985309461694037e-05, | |
| "loss": 0.8547732830047607, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.207492795389049, | |
| "grad_norm": 0.5622901916503906, | |
| "learning_rate": 1.9848995945935305e-05, | |
| "loss": 0.7855159640312195, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.20893371757925072, | |
| "grad_norm": 0.6368292570114136, | |
| "learning_rate": 1.984484131712429e-05, | |
| "loss": 0.8236104249954224, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.21037463976945245, | |
| "grad_norm": 0.4789673089981079, | |
| "learning_rate": 1.9840630754112152e-05, | |
| "loss": 0.7789024114608765, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.21181556195965417, | |
| "grad_norm": 0.5207936763763428, | |
| "learning_rate": 1.9836364280821522e-05, | |
| "loss": 0.8215268850326538, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.2132564841498559, | |
| "grad_norm": 0.5288500189781189, | |
| "learning_rate": 1.9832041921492688e-05, | |
| "loss": 0.883397102355957, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.21469740634005763, | |
| "grad_norm": 0.4870454967021942, | |
| "learning_rate": 1.9827663700683454e-05, | |
| "loss": 0.903971791267395, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.21613832853025935, | |
| "grad_norm": 0.468478798866272, | |
| "learning_rate": 1.982322964326901e-05, | |
| "loss": 0.7198023796081543, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.21757925072046108, | |
| "grad_norm": 0.5701258182525635, | |
| "learning_rate": 1.9818739774441784e-05, | |
| "loss": 0.8818264007568359, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.21902017291066284, | |
| "grad_norm": 0.5318727493286133, | |
| "learning_rate": 1.98141941197113e-05, | |
| "loss": 0.7901202440261841, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.22046109510086456, | |
| "grad_norm": 0.6329215168952942, | |
| "learning_rate": 1.980959270490404e-05, | |
| "loss": 0.9011144638061523, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.2219020172910663, | |
| "grad_norm": 0.5430089235305786, | |
| "learning_rate": 1.980493555616328e-05, | |
| "loss": 0.8788041472434998, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.22334293948126802, | |
| "grad_norm": 0.5590965747833252, | |
| "learning_rate": 1.980022269994896e-05, | |
| "loss": 0.9463634490966797, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.22478386167146974, | |
| "grad_norm": 0.5332587361335754, | |
| "learning_rate": 1.9795454163037523e-05, | |
| "loss": 0.8602564334869385, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.22622478386167147, | |
| "grad_norm": 0.5108247399330139, | |
| "learning_rate": 1.9790629972521772e-05, | |
| "loss": 0.956876277923584, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.2276657060518732, | |
| "grad_norm": 0.48440778255462646, | |
| "learning_rate": 1.97857501558107e-05, | |
| "loss": 0.8002164363861084, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.22910662824207492, | |
| "grad_norm": 0.5165001749992371, | |
| "learning_rate": 1.9780814740629357e-05, | |
| "loss": 0.7904690504074097, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.23054755043227665, | |
| "grad_norm": 0.5920788049697876, | |
| "learning_rate": 1.9775823755018665e-05, | |
| "loss": 1.004716157913208, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.23198847262247838, | |
| "grad_norm": 0.46533599495887756, | |
| "learning_rate": 1.9770777227335292e-05, | |
| "loss": 0.7796809673309326, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.2334293948126801, | |
| "grad_norm": 0.5575913786888123, | |
| "learning_rate": 1.976567518625145e-05, | |
| "loss": 0.7896379232406616, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.23487031700288186, | |
| "grad_norm": 0.5064666271209717, | |
| "learning_rate": 1.976051766075477e-05, | |
| "loss": 0.8482154607772827, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.23631123919308358, | |
| "grad_norm": 0.5755187273025513, | |
| "learning_rate": 1.9755304680148125e-05, | |
| "loss": 0.9588966965675354, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.2377521613832853, | |
| "grad_norm": 0.5244006514549255, | |
| "learning_rate": 1.9750036274049447e-05, | |
| "loss": 0.8246345520019531, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.23919308357348704, | |
| "grad_norm": 0.5705826878547668, | |
| "learning_rate": 1.974471247239158e-05, | |
| "loss": 0.9529173374176025, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.24063400576368876, | |
| "grad_norm": 0.5782693028450012, | |
| "learning_rate": 1.97393333054221e-05, | |
| "loss": 0.9156125783920288, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.2420749279538905, | |
| "grad_norm": 0.5548809170722961, | |
| "learning_rate": 1.9733898803703145e-05, | |
| "loss": 0.87775719165802, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.24351585014409222, | |
| "grad_norm": 0.5045956373214722, | |
| "learning_rate": 1.972840899811125e-05, | |
| "loss": 0.8338059782981873, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.24495677233429394, | |
| "grad_norm": 0.4768422245979309, | |
| "learning_rate": 1.9722863919837146e-05, | |
| "loss": 0.8089622259140015, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.24639769452449567, | |
| "grad_norm": 0.5043761134147644, | |
| "learning_rate": 1.9717263600385614e-05, | |
| "loss": 0.8071548938751221, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.2478386167146974, | |
| "grad_norm": 0.449352502822876, | |
| "learning_rate": 1.9711608071575285e-05, | |
| "loss": 0.7377144694328308, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.24927953890489912, | |
| "grad_norm": 0.5425634980201721, | |
| "learning_rate": 1.970589736553847e-05, | |
| "loss": 0.9347266554832458, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.2507204610951009, | |
| "grad_norm": 0.5423866510391235, | |
| "learning_rate": 1.970013151472097e-05, | |
| "loss": 0.9006168842315674, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.2521613832853026, | |
| "grad_norm": 0.46786877512931824, | |
| "learning_rate": 1.96943105518819e-05, | |
| "loss": 0.8953101634979248, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.25360230547550433, | |
| "grad_norm": 0.5013384222984314, | |
| "learning_rate": 1.968843451009349e-05, | |
| "loss": 0.9531070590019226, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.25504322766570603, | |
| "grad_norm": 0.5079538226127625, | |
| "learning_rate": 1.9682503422740915e-05, | |
| "loss": 0.7452487945556641, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.2564841498559078, | |
| "grad_norm": 0.48233309388160706, | |
| "learning_rate": 1.967651732352209e-05, | |
| "loss": 0.78824782371521, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.2579250720461095, | |
| "grad_norm": 0.4961184859275818, | |
| "learning_rate": 1.9670476246447484e-05, | |
| "loss": 0.7305347919464111, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.25936599423631124, | |
| "grad_norm": 0.5201149582862854, | |
| "learning_rate": 1.966438022583993e-05, | |
| "loss": 0.896546483039856, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.260806916426513, | |
| "grad_norm": 0.5316110849380493, | |
| "learning_rate": 1.9658229296334416e-05, | |
| "loss": 0.9205869436264038, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.2622478386167147, | |
| "grad_norm": 0.4991670548915863, | |
| "learning_rate": 1.9652023492877915e-05, | |
| "loss": 0.7877059578895569, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.26368876080691644, | |
| "grad_norm": 0.5440250635147095, | |
| "learning_rate": 1.964576285072916e-05, | |
| "loss": 0.7989544868469238, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.26512968299711814, | |
| "grad_norm": 0.5110585689544678, | |
| "learning_rate": 1.963944740545846e-05, | |
| "loss": 0.861824631690979, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.2665706051873199, | |
| "grad_norm": 0.5013352632522583, | |
| "learning_rate": 1.9633077192947486e-05, | |
| "loss": 0.7943878173828125, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.2680115273775216, | |
| "grad_norm": 0.4938945472240448, | |
| "learning_rate": 1.9626652249389076e-05, | |
| "loss": 0.7950688600540161, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.26945244956772335, | |
| "grad_norm": 0.5503535270690918, | |
| "learning_rate": 1.9620172611287028e-05, | |
| "loss": 0.7456330060958862, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.27089337175792505, | |
| "grad_norm": 0.5915765166282654, | |
| "learning_rate": 1.9613638315455888e-05, | |
| "loss": 0.803912878036499, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2723342939481268, | |
| "grad_norm": 0.5289369821548462, | |
| "learning_rate": 1.9607049399020746e-05, | |
| "loss": 0.8069210052490234, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.2737752161383285, | |
| "grad_norm": 0.6146844029426575, | |
| "learning_rate": 1.9600405899417026e-05, | |
| "loss": 0.9639400243759155, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.27521613832853026, | |
| "grad_norm": 0.5518732070922852, | |
| "learning_rate": 1.9593707854390263e-05, | |
| "loss": 0.8917955160140991, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.276657060518732, | |
| "grad_norm": 0.5757451057434082, | |
| "learning_rate": 1.9586955301995903e-05, | |
| "loss": 1.0030066967010498, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.2780979827089337, | |
| "grad_norm": 0.6039057374000549, | |
| "learning_rate": 1.9580148280599075e-05, | |
| "loss": 0.770416796207428, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.27953890489913547, | |
| "grad_norm": 0.5454704761505127, | |
| "learning_rate": 1.957328682887438e-05, | |
| "loss": 0.7988390326499939, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.28097982708933716, | |
| "grad_norm": 0.5586422085762024, | |
| "learning_rate": 1.9566370985805666e-05, | |
| "loss": 0.8730517625808716, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.2824207492795389, | |
| "grad_norm": 0.5465781092643738, | |
| "learning_rate": 1.9559400790685813e-05, | |
| "loss": 0.984703779220581, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.2838616714697406, | |
| "grad_norm": 0.5505930185317993, | |
| "learning_rate": 1.9552376283116508e-05, | |
| "loss": 0.8712029457092285, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.28530259365994237, | |
| "grad_norm": 0.49047884345054626, | |
| "learning_rate": 1.9545297503008014e-05, | |
| "loss": 0.9437016844749451, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.28674351585014407, | |
| "grad_norm": 0.5203781127929688, | |
| "learning_rate": 1.953816449057895e-05, | |
| "loss": 0.9115222096443176, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.2881844380403458, | |
| "grad_norm": 0.5116919279098511, | |
| "learning_rate": 1.9530977286356053e-05, | |
| "loss": 0.8222418427467346, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2896253602305475, | |
| "grad_norm": 0.5122188329696655, | |
| "learning_rate": 1.9523735931173964e-05, | |
| "loss": 0.7692494988441467, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.2910662824207493, | |
| "grad_norm": 0.48403820395469666, | |
| "learning_rate": 1.951644046617499e-05, | |
| "loss": 0.7008178234100342, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.29250720461095103, | |
| "grad_norm": 0.5577651262283325, | |
| "learning_rate": 1.950909093280885e-05, | |
| "loss": 0.9718255996704102, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.29394812680115273, | |
| "grad_norm": 0.5913541913032532, | |
| "learning_rate": 1.9501687372832466e-05, | |
| "loss": 1.0085289478302002, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.2953890489913545, | |
| "grad_norm": 0.6354373097419739, | |
| "learning_rate": 1.9494229828309724e-05, | |
| "loss": 0.9526057243347168, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.2968299711815562, | |
| "grad_norm": 0.5196180939674377, | |
| "learning_rate": 1.948671834161122e-05, | |
| "loss": 0.9512363076210022, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.29827089337175794, | |
| "grad_norm": 0.49973103404045105, | |
| "learning_rate": 1.947915295541402e-05, | |
| "loss": 0.8308489322662354, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.29971181556195964, | |
| "grad_norm": 0.5216118693351746, | |
| "learning_rate": 1.947153371270144e-05, | |
| "loss": 0.8502541780471802, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.3011527377521614, | |
| "grad_norm": 0.6129580140113831, | |
| "learning_rate": 1.946386065676277e-05, | |
| "loss": 0.8418374061584473, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.3025936599423631, | |
| "grad_norm": 0.5305296182632446, | |
| "learning_rate": 1.945613383119305e-05, | |
| "loss": 0.8838613033294678, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.30403458213256485, | |
| "grad_norm": 0.5104044079780579, | |
| "learning_rate": 1.944835327989282e-05, | |
| "loss": 0.8677416443824768, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.30547550432276654, | |
| "grad_norm": 0.5255080461502075, | |
| "learning_rate": 1.944051904706786e-05, | |
| "loss": 0.8306810855865479, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.3069164265129683, | |
| "grad_norm": 0.5854450464248657, | |
| "learning_rate": 1.9432631177228948e-05, | |
| "loss": 0.9153035879135132, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.30835734870317005, | |
| "grad_norm": 0.6011139154434204, | |
| "learning_rate": 1.942468971519161e-05, | |
| "loss": 0.8749545812606812, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.30979827089337175, | |
| "grad_norm": 0.6026611924171448, | |
| "learning_rate": 1.941669470607585e-05, | |
| "loss": 0.9470349550247192, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.3112391930835735, | |
| "grad_norm": 0.5269956588745117, | |
| "learning_rate": 1.9408646195305914e-05, | |
| "loss": 0.8375416398048401, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.3126801152737752, | |
| "grad_norm": 0.5777774453163147, | |
| "learning_rate": 1.940054422861002e-05, | |
| "loss": 0.8614240884780884, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.31412103746397696, | |
| "grad_norm": 0.5759449601173401, | |
| "learning_rate": 1.939238885202009e-05, | |
| "loss": 0.886568546295166, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.31556195965417866, | |
| "grad_norm": 0.6054496765136719, | |
| "learning_rate": 1.9384180111871502e-05, | |
| "loss": 0.9906665086746216, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.3170028818443804, | |
| "grad_norm": 0.5911257863044739, | |
| "learning_rate": 1.9375918054802836e-05, | |
| "loss": 0.8998844623565674, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3184438040345821, | |
| "grad_norm": 0.5230793356895447, | |
| "learning_rate": 1.936760272775558e-05, | |
| "loss": 0.8659279346466064, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.31988472622478387, | |
| "grad_norm": 0.5517547130584717, | |
| "learning_rate": 1.935923417797389e-05, | |
| "loss": 0.8853185176849365, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.32132564841498557, | |
| "grad_norm": 0.5559066534042358, | |
| "learning_rate": 1.9350812453004303e-05, | |
| "loss": 0.8165359497070312, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.3227665706051873, | |
| "grad_norm": 0.7742547988891602, | |
| "learning_rate": 1.934233760069548e-05, | |
| "loss": 0.9410666227340698, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.3242074927953891, | |
| "grad_norm": 0.5944716334342957, | |
| "learning_rate": 1.933380966919792e-05, | |
| "loss": 0.8204824328422546, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.3256484149855908, | |
| "grad_norm": 0.5238089561462402, | |
| "learning_rate": 1.9325228706963716e-05, | |
| "loss": 0.7781552672386169, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.3270893371757925, | |
| "grad_norm": 0.5538492202758789, | |
| "learning_rate": 1.9316594762746238e-05, | |
| "loss": 0.861137330532074, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.3285302593659942, | |
| "grad_norm": 0.49301236867904663, | |
| "learning_rate": 1.9307907885599883e-05, | |
| "loss": 0.7672471404075623, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.329971181556196, | |
| "grad_norm": 0.5632166266441345, | |
| "learning_rate": 1.9299168124879798e-05, | |
| "loss": 0.9043705463409424, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.3314121037463977, | |
| "grad_norm": 0.517174482345581, | |
| "learning_rate": 1.9290375530241577e-05, | |
| "loss": 0.8089410662651062, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.33285302593659943, | |
| "grad_norm": 0.5550019145011902, | |
| "learning_rate": 1.9281530151641016e-05, | |
| "loss": 0.8886780142784119, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.33429394812680113, | |
| "grad_norm": 0.6222152709960938, | |
| "learning_rate": 1.9272632039333784e-05, | |
| "loss": 0.7788711786270142, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.3357348703170029, | |
| "grad_norm": 0.5116149187088013, | |
| "learning_rate": 1.9263681243875173e-05, | |
| "loss": 0.8461220264434814, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.3371757925072046, | |
| "grad_norm": 0.5942230820655823, | |
| "learning_rate": 1.92546778161198e-05, | |
| "loss": 0.856147289276123, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.33861671469740634, | |
| "grad_norm": 0.5200196504592896, | |
| "learning_rate": 1.9245621807221306e-05, | |
| "loss": 0.7370104789733887, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.3400576368876081, | |
| "grad_norm": 0.4902980625629425, | |
| "learning_rate": 1.9236513268632085e-05, | |
| "loss": 0.8132247924804688, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.3414985590778098, | |
| "grad_norm": 0.5125890374183655, | |
| "learning_rate": 1.922735225210298e-05, | |
| "loss": 0.8135820627212524, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.34293948126801155, | |
| "grad_norm": 0.6545057892799377, | |
| "learning_rate": 1.9218138809682988e-05, | |
| "loss": 0.9990659952163696, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.34438040345821325, | |
| "grad_norm": 0.45519211888313293, | |
| "learning_rate": 1.9208872993718967e-05, | |
| "loss": 0.6385080814361572, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.345821325648415, | |
| "grad_norm": 0.5084793567657471, | |
| "learning_rate": 1.919955485685535e-05, | |
| "loss": 0.8031101226806641, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.3472622478386167, | |
| "grad_norm": 0.5788770318031311, | |
| "learning_rate": 1.9190184452033828e-05, | |
| "loss": 0.8098981380462646, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.34870317002881845, | |
| "grad_norm": 0.5901806354522705, | |
| "learning_rate": 1.9180761832493045e-05, | |
| "loss": 0.770499050617218, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.35014409221902015, | |
| "grad_norm": 0.5531857013702393, | |
| "learning_rate": 1.917128705176833e-05, | |
| "loss": 0.7973406314849854, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.3515850144092219, | |
| "grad_norm": 0.533688485622406, | |
| "learning_rate": 1.9161760163691347e-05, | |
| "loss": 0.8184660077095032, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.3530259365994236, | |
| "grad_norm": 0.6230199933052063, | |
| "learning_rate": 1.915218122238983e-05, | |
| "loss": 0.9237667322158813, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.35446685878962536, | |
| "grad_norm": 0.5710411667823792, | |
| "learning_rate": 1.9142550282287247e-05, | |
| "loss": 0.9484732747077942, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.3559077809798271, | |
| "grad_norm": 0.5010262727737427, | |
| "learning_rate": 1.9132867398102498e-05, | |
| "loss": 0.7892597317695618, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.3573487031700288, | |
| "grad_norm": 0.5981287956237793, | |
| "learning_rate": 1.912313262484962e-05, | |
| "loss": 0.8831825852394104, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.35878962536023057, | |
| "grad_norm": 0.5648021697998047, | |
| "learning_rate": 1.911334601783745e-05, | |
| "loss": 0.7330418825149536, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.36023054755043227, | |
| "grad_norm": 0.540814220905304, | |
| "learning_rate": 1.910350763266933e-05, | |
| "loss": 0.8082142472267151, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.361671469740634, | |
| "grad_norm": 0.5894553065299988, | |
| "learning_rate": 1.9093617525242772e-05, | |
| "loss": 0.9762543439865112, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.3631123919308357, | |
| "grad_norm": 0.5327045321464539, | |
| "learning_rate": 1.9083675751749174e-05, | |
| "loss": 0.7824052572250366, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.3645533141210375, | |
| "grad_norm": 0.5246372818946838, | |
| "learning_rate": 1.907368236867345e-05, | |
| "loss": 0.7279900312423706, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.3659942363112392, | |
| "grad_norm": 0.5794147849082947, | |
| "learning_rate": 1.9063637432793757e-05, | |
| "loss": 0.7523423433303833, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.36743515850144093, | |
| "grad_norm": 0.5827282071113586, | |
| "learning_rate": 1.9053541001181156e-05, | |
| "loss": 0.8881576061248779, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.3688760806916426, | |
| "grad_norm": 0.5458179712295532, | |
| "learning_rate": 1.9043393131199266e-05, | |
| "loss": 0.7404780983924866, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.3703170028818444, | |
| "grad_norm": 0.5116549134254456, | |
| "learning_rate": 1.9033193880503976e-05, | |
| "loss": 0.846668004989624, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.37175792507204614, | |
| "grad_norm": 0.5459802746772766, | |
| "learning_rate": 1.9022943307043085e-05, | |
| "loss": 0.7665458917617798, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.37319884726224783, | |
| "grad_norm": 0.584256649017334, | |
| "learning_rate": 1.9012641469055998e-05, | |
| "loss": 0.9224525690078735, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.3746397694524496, | |
| "grad_norm": 0.5947668552398682, | |
| "learning_rate": 1.9002288425073367e-05, | |
| "loss": 0.8733052611351013, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.3760806916426513, | |
| "grad_norm": 0.5581645369529724, | |
| "learning_rate": 1.8991884233916795e-05, | |
| "loss": 0.9351893663406372, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.37752161383285304, | |
| "grad_norm": 0.5662564039230347, | |
| "learning_rate": 1.8981428954698466e-05, | |
| "loss": 0.841423749923706, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.37896253602305474, | |
| "grad_norm": 0.5291077494621277, | |
| "learning_rate": 1.8970922646820825e-05, | |
| "loss": 0.8083174228668213, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.3804034582132565, | |
| "grad_norm": 0.549354612827301, | |
| "learning_rate": 1.8960365369976256e-05, | |
| "loss": 0.9548866152763367, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3818443804034582, | |
| "grad_norm": 0.5359464883804321, | |
| "learning_rate": 1.8949757184146706e-05, | |
| "loss": 0.9965292811393738, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.38328530259365995, | |
| "grad_norm": 0.4956778287887573, | |
| "learning_rate": 1.893909814960338e-05, | |
| "loss": 0.7761472463607788, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.38472622478386165, | |
| "grad_norm": 0.6444170475006104, | |
| "learning_rate": 1.8928388326906376e-05, | |
| "loss": 0.9611786603927612, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.3861671469740634, | |
| "grad_norm": 0.5334309935569763, | |
| "learning_rate": 1.8917627776904352e-05, | |
| "loss": 0.7814089059829712, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.38760806916426516, | |
| "grad_norm": 0.5955665707588196, | |
| "learning_rate": 1.8906816560734182e-05, | |
| "loss": 0.7999836206436157, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.38904899135446686, | |
| "grad_norm": 0.5542837381362915, | |
| "learning_rate": 1.889595473982059e-05, | |
| "loss": 0.8167480230331421, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.3904899135446686, | |
| "grad_norm": 0.5896769762039185, | |
| "learning_rate": 1.8885042375875825e-05, | |
| "loss": 0.8907427787780762, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.3919308357348703, | |
| "grad_norm": 0.5406194925308228, | |
| "learning_rate": 1.8874079530899298e-05, | |
| "loss": 0.7174965143203735, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.39337175792507206, | |
| "grad_norm": 0.5626124739646912, | |
| "learning_rate": 1.8863066267177234e-05, | |
| "loss": 0.8048909902572632, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.39481268011527376, | |
| "grad_norm": 0.5179824233055115, | |
| "learning_rate": 1.885200264728231e-05, | |
| "loss": 0.9267027378082275, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.3962536023054755, | |
| "grad_norm": 0.5661771893501282, | |
| "learning_rate": 1.884088873407331e-05, | |
| "loss": 0.9026603698730469, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3976945244956772, | |
| "grad_norm": 0.5045554637908936, | |
| "learning_rate": 1.882972459069476e-05, | |
| "loss": 0.6470727920532227, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.39913544668587897, | |
| "grad_norm": 0.56135493516922, | |
| "learning_rate": 1.8818510280576577e-05, | |
| "loss": 0.8532092571258545, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.40057636887608067, | |
| "grad_norm": 0.5583396553993225, | |
| "learning_rate": 1.88072458674337e-05, | |
| "loss": 0.9061315059661865, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.4020172910662824, | |
| "grad_norm": 0.5593348741531372, | |
| "learning_rate": 1.8795931415265735e-05, | |
| "loss": 0.8761183023452759, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.4034582132564842, | |
| "grad_norm": 0.5651080012321472, | |
| "learning_rate": 1.8784566988356586e-05, | |
| "loss": 0.8924948573112488, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4048991354466859, | |
| "grad_norm": 0.5161260962486267, | |
| "learning_rate": 1.877315265127409e-05, | |
| "loss": 0.7666558623313904, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.40634005763688763, | |
| "grad_norm": 0.5837459564208984, | |
| "learning_rate": 1.8761688468869658e-05, | |
| "loss": 0.8420302867889404, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.40778097982708933, | |
| "grad_norm": 0.4191261827945709, | |
| "learning_rate": 1.8750174506277902e-05, | |
| "loss": 0.6361753940582275, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.4092219020172911, | |
| "grad_norm": 0.5724524855613708, | |
| "learning_rate": 1.8738610828916255e-05, | |
| "loss": 0.8530267477035522, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.4106628242074928, | |
| "grad_norm": 0.542438268661499, | |
| "learning_rate": 1.8726997502484617e-05, | |
| "loss": 0.8724586963653564, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.41210374639769454, | |
| "grad_norm": 0.6427977085113525, | |
| "learning_rate": 1.8715334592964964e-05, | |
| "loss": 0.9001520276069641, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.41354466858789624, | |
| "grad_norm": 0.5782722234725952, | |
| "learning_rate": 1.8703622166620995e-05, | |
| "loss": 0.7817631959915161, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.414985590778098, | |
| "grad_norm": 0.5602166652679443, | |
| "learning_rate": 1.869186028999773e-05, | |
| "loss": 0.8070363998413086, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.4164265129682997, | |
| "grad_norm": 0.4618314504623413, | |
| "learning_rate": 1.868004902992115e-05, | |
| "loss": 0.7365565299987793, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.41786743515850144, | |
| "grad_norm": 0.5189284682273865, | |
| "learning_rate": 1.8668188453497814e-05, | |
| "loss": 0.7789565324783325, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.41930835734870314, | |
| "grad_norm": 0.5521842241287231, | |
| "learning_rate": 1.865627862811447e-05, | |
| "loss": 0.8625829219818115, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.4207492795389049, | |
| "grad_norm": 0.5609997510910034, | |
| "learning_rate": 1.8644319621437677e-05, | |
| "loss": 0.7133926153182983, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.42219020172910665, | |
| "grad_norm": 0.541661262512207, | |
| "learning_rate": 1.863231150141343e-05, | |
| "loss": 0.8321035504341125, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.42363112391930835, | |
| "grad_norm": 0.5149182677268982, | |
| "learning_rate": 1.8620254336266757e-05, | |
| "loss": 0.874350905418396, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.4250720461095101, | |
| "grad_norm": 0.6094365119934082, | |
| "learning_rate": 1.8608148194501343e-05, | |
| "loss": 0.95909583568573, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.4265129682997118, | |
| "grad_norm": 0.545134425163269, | |
| "learning_rate": 1.8595993144899135e-05, | |
| "loss": 0.8478891849517822, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.42795389048991356, | |
| "grad_norm": 0.5844987630844116, | |
| "learning_rate": 1.858378925651996e-05, | |
| "loss": 0.952475905418396, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.42939481268011526, | |
| "grad_norm": 0.6407363414764404, | |
| "learning_rate": 1.8571536598701114e-05, | |
| "loss": 0.890306293964386, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.430835734870317, | |
| "grad_norm": 0.5291120409965515, | |
| "learning_rate": 1.8559235241056994e-05, | |
| "loss": 0.7820404767990112, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.4322766570605187, | |
| "grad_norm": 0.6523168683052063, | |
| "learning_rate": 1.8546885253478678e-05, | |
| "loss": 0.9190243482589722, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.43371757925072046, | |
| "grad_norm": 0.5536230802536011, | |
| "learning_rate": 1.853448670613354e-05, | |
| "loss": 0.8771055936813354, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.43515850144092216, | |
| "grad_norm": 0.563988208770752, | |
| "learning_rate": 1.8522039669464863e-05, | |
| "loss": 0.9419461488723755, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.4365994236311239, | |
| "grad_norm": 0.5542746186256409, | |
| "learning_rate": 1.8509544214191403e-05, | |
| "loss": 0.8944621086120605, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.43804034582132567, | |
| "grad_norm": 0.5350150465965271, | |
| "learning_rate": 1.8497000411307035e-05, | |
| "loss": 0.8363540768623352, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.43948126801152737, | |
| "grad_norm": 0.6578159928321838, | |
| "learning_rate": 1.8484408332080298e-05, | |
| "loss": 0.8941947221755981, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.4409221902017291, | |
| "grad_norm": 0.5049351453781128, | |
| "learning_rate": 1.847176804805404e-05, | |
| "loss": 0.7112985849380493, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.4423631123919308, | |
| "grad_norm": 0.5391475558280945, | |
| "learning_rate": 1.845907963104497e-05, | |
| "loss": 0.8202729225158691, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.4438040345821326, | |
| "grad_norm": 0.533073365688324, | |
| "learning_rate": 1.844634315314329e-05, | |
| "loss": 0.7154335379600525, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.4452449567723343, | |
| "grad_norm": 0.5108841061592102, | |
| "learning_rate": 1.843355868671224e-05, | |
| "loss": 0.809640109539032, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.44668587896253603, | |
| "grad_norm": 0.5836730003356934, | |
| "learning_rate": 1.8420726304387723e-05, | |
| "loss": 0.8588663339614868, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.44812680115273773, | |
| "grad_norm": 0.553754448890686, | |
| "learning_rate": 1.840784607907788e-05, | |
| "loss": 0.9030488729476929, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.4495677233429395, | |
| "grad_norm": 0.5422321557998657, | |
| "learning_rate": 1.839491808396267e-05, | |
| "loss": 0.9312916994094849, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.4510086455331412, | |
| "grad_norm": 0.571288526058197, | |
| "learning_rate": 1.8381942392493464e-05, | |
| "loss": 0.7808306217193604, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.45244956772334294, | |
| "grad_norm": 0.6292662024497986, | |
| "learning_rate": 1.836891907839262e-05, | |
| "loss": 0.8910583257675171, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.4538904899135447, | |
| "grad_norm": 0.7166162133216858, | |
| "learning_rate": 1.8355848215653073e-05, | |
| "loss": 0.9522457122802734, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.4553314121037464, | |
| "grad_norm": 0.530035138130188, | |
| "learning_rate": 1.8342729878537903e-05, | |
| "loss": 0.7427791357040405, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.45677233429394815, | |
| "grad_norm": 0.5844604969024658, | |
| "learning_rate": 1.8329564141579924e-05, | |
| "loss": 0.8231528997421265, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.45821325648414984, | |
| "grad_norm": 0.5728287100791931, | |
| "learning_rate": 1.831635107958125e-05, | |
| "loss": 0.7926725149154663, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.4596541786743516, | |
| "grad_norm": 0.5957269668579102, | |
| "learning_rate": 1.8303090767612882e-05, | |
| "loss": 0.7666646242141724, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.4610951008645533, | |
| "grad_norm": 0.5679943561553955, | |
| "learning_rate": 1.828978328101428e-05, | |
| "loss": 0.8417126536369324, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.46253602305475505, | |
| "grad_norm": 0.5592843890190125, | |
| "learning_rate": 1.8276428695392908e-05, | |
| "loss": 0.8209915161132812, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.46397694524495675, | |
| "grad_norm": 0.5582208633422852, | |
| "learning_rate": 1.8263027086623852e-05, | |
| "loss": 0.8837690353393555, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.4654178674351585, | |
| "grad_norm": 0.5516442656517029, | |
| "learning_rate": 1.824957853084935e-05, | |
| "loss": 0.8529192209243774, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.4668587896253602, | |
| "grad_norm": 0.46436768770217896, | |
| "learning_rate": 1.8236083104478373e-05, | |
| "loss": 0.7889063358306885, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.46829971181556196, | |
| "grad_norm": 0.5388792157173157, | |
| "learning_rate": 1.82225408841862e-05, | |
| "loss": 0.9407418966293335, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.4697406340057637, | |
| "grad_norm": 0.5538932681083679, | |
| "learning_rate": 1.8208951946913965e-05, | |
| "loss": 0.7872345447540283, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.4711815561959654, | |
| "grad_norm": 0.5267892479896545, | |
| "learning_rate": 1.819531636986823e-05, | |
| "loss": 0.882459282875061, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.47262247838616717, | |
| "grad_norm": 0.5909714698791504, | |
| "learning_rate": 1.8181634230520537e-05, | |
| "loss": 0.9235331416130066, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.47406340057636887, | |
| "grad_norm": 0.46869757771492004, | |
| "learning_rate": 1.8167905606606995e-05, | |
| "loss": 0.729993462562561, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.4755043227665706, | |
| "grad_norm": 0.5067007541656494, | |
| "learning_rate": 1.8154130576127794e-05, | |
| "loss": 0.782564103603363, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4769452449567723, | |
| "grad_norm": 0.5801006555557251, | |
| "learning_rate": 1.8140309217346805e-05, | |
| "loss": 0.8379372358322144, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.4783861671469741, | |
| "grad_norm": 0.5479756593704224, | |
| "learning_rate": 1.812644160879111e-05, | |
| "loss": 0.8435878753662109, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.47982708933717577, | |
| "grad_norm": 0.5957703590393066, | |
| "learning_rate": 1.8112527829250558e-05, | |
| "loss": 0.9512555599212646, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.4812680115273775, | |
| "grad_norm": 0.6068620681762695, | |
| "learning_rate": 1.809856795777733e-05, | |
| "loss": 0.9145616292953491, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.4827089337175792, | |
| "grad_norm": 0.6074503064155579, | |
| "learning_rate": 1.8084562073685482e-05, | |
| "loss": 0.882449746131897, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.484149855907781, | |
| "grad_norm": 0.525236964225769, | |
| "learning_rate": 1.807051025655048e-05, | |
| "loss": 0.8563383221626282, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.48559077809798273, | |
| "grad_norm": 0.5497779250144958, | |
| "learning_rate": 1.8056412586208784e-05, | |
| "loss": 0.9395558834075928, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.48703170028818443, | |
| "grad_norm": 0.6293545365333557, | |
| "learning_rate": 1.8042269142757354e-05, | |
| "loss": 0.88382488489151, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.4884726224783862, | |
| "grad_norm": 0.577172577381134, | |
| "learning_rate": 1.8028080006553223e-05, | |
| "loss": 0.8618476390838623, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.4899135446685879, | |
| "grad_norm": 0.6339530944824219, | |
| "learning_rate": 1.8013845258213024e-05, | |
| "loss": 0.9440295696258545, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.49135446685878964, | |
| "grad_norm": 0.5603951215744019, | |
| "learning_rate": 1.7999564978612544e-05, | |
| "loss": 0.8309040069580078, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.49279538904899134, | |
| "grad_norm": 0.5788914561271667, | |
| "learning_rate": 1.7985239248886264e-05, | |
| "loss": 0.7537168264389038, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.4942363112391931, | |
| "grad_norm": 0.6304644346237183, | |
| "learning_rate": 1.797086815042688e-05, | |
| "loss": 0.8292367458343506, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.4956772334293948, | |
| "grad_norm": 0.5928136110305786, | |
| "learning_rate": 1.7956451764884862e-05, | |
| "loss": 0.9440184831619263, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.49711815561959655, | |
| "grad_norm": 0.5665022730827332, | |
| "learning_rate": 1.7941990174167987e-05, | |
| "loss": 0.8995509147644043, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.49855907780979825, | |
| "grad_norm": 0.5720584392547607, | |
| "learning_rate": 1.7927483460440857e-05, | |
| "loss": 0.76401686668396, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.5267989635467529, | |
| "learning_rate": 1.7912931706124447e-05, | |
| "loss": 0.8446654081344604, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.5014409221902018, | |
| "grad_norm": 0.5750987529754639, | |
| "learning_rate": 1.789833499389564e-05, | |
| "loss": 0.9858725070953369, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.5028818443804035, | |
| "grad_norm": 0.5349816083908081, | |
| "learning_rate": 1.7883693406686746e-05, | |
| "loss": 0.7775611877441406, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.5043227665706052, | |
| "grad_norm": 0.5641323328018188, | |
| "learning_rate": 1.786900702768504e-05, | |
| "loss": 0.8651770353317261, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5057636887608069, | |
| "grad_norm": 0.5117978453636169, | |
| "learning_rate": 1.7854275940332272e-05, | |
| "loss": 0.9010483026504517, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.5072046109510087, | |
| "grad_norm": 0.5440452694892883, | |
| "learning_rate": 1.7839500228324223e-05, | |
| "loss": 0.8568730354309082, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.5086455331412104, | |
| "grad_norm": 0.6057641506195068, | |
| "learning_rate": 1.78246799756102e-05, | |
| "loss": 0.8103794455528259, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.5100864553314121, | |
| "grad_norm": 0.5351923108100891, | |
| "learning_rate": 1.7809815266392575e-05, | |
| "loss": 0.7672939300537109, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.5115273775216138, | |
| "grad_norm": 0.5460227131843567, | |
| "learning_rate": 1.779490618512631e-05, | |
| "loss": 0.8088076114654541, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.5129682997118156, | |
| "grad_norm": 0.5851891040802002, | |
| "learning_rate": 1.7779952816518454e-05, | |
| "loss": 0.9721853137016296, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.5144092219020173, | |
| "grad_norm": 0.5677433013916016, | |
| "learning_rate": 1.7764955245527693e-05, | |
| "loss": 0.8301690220832825, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.515850144092219, | |
| "grad_norm": 0.535126805305481, | |
| "learning_rate": 1.7749913557363844e-05, | |
| "loss": 0.7846548557281494, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.5172910662824207, | |
| "grad_norm": 0.525834321975708, | |
| "learning_rate": 1.7734827837487386e-05, | |
| "loss": 0.7961332201957703, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.5187319884726225, | |
| "grad_norm": 0.5872762203216553, | |
| "learning_rate": 1.771969817160896e-05, | |
| "loss": 0.9165979623794556, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5201729106628242, | |
| "grad_norm": 0.6101512312889099, | |
| "learning_rate": 1.770452464568889e-05, | |
| "loss": 0.8378180861473083, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.521613832853026, | |
| "grad_norm": 0.5622274279594421, | |
| "learning_rate": 1.7689307345936705e-05, | |
| "loss": 0.9264764189720154, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.5230547550432276, | |
| "grad_norm": 0.5881224274635315, | |
| "learning_rate": 1.767404635881062e-05, | |
| "loss": 0.7161828279495239, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.5244956772334294, | |
| "grad_norm": 0.5270618796348572, | |
| "learning_rate": 1.7658741771017076e-05, | |
| "loss": 0.7974847555160522, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.5259365994236311, | |
| "grad_norm": 0.6095194816589355, | |
| "learning_rate": 1.7643393669510236e-05, | |
| "loss": 0.9246715307235718, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.5273775216138329, | |
| "grad_norm": 0.5328574776649475, | |
| "learning_rate": 1.7628002141491477e-05, | |
| "loss": 0.7695842981338501, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.5288184438040345, | |
| "grad_norm": 0.5745126605033875, | |
| "learning_rate": 1.7612567274408925e-05, | |
| "loss": 0.8186914920806885, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.5302593659942363, | |
| "grad_norm": 0.628709077835083, | |
| "learning_rate": 1.759708915595692e-05, | |
| "loss": 0.8628635406494141, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.531700288184438, | |
| "grad_norm": 0.6294564604759216, | |
| "learning_rate": 1.7581567874075552e-05, | |
| "loss": 0.9215906858444214, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.5331412103746398, | |
| "grad_norm": 0.5869788527488708, | |
| "learning_rate": 1.7566003516950146e-05, | |
| "loss": 0.8090559244155884, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5345821325648416, | |
| "grad_norm": 0.5539984703063965, | |
| "learning_rate": 1.755039617301075e-05, | |
| "loss": 0.7528271675109863, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.5360230547550432, | |
| "grad_norm": 0.6224076747894287, | |
| "learning_rate": 1.753474593093167e-05, | |
| "loss": 0.8555803298950195, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.537463976945245, | |
| "grad_norm": 0.5650821328163147, | |
| "learning_rate": 1.751905287963091e-05, | |
| "loss": 0.7390874624252319, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.5389048991354467, | |
| "grad_norm": 0.574177086353302, | |
| "learning_rate": 1.7503317108269722e-05, | |
| "loss": 0.8000231385231018, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.5403458213256485, | |
| "grad_norm": 0.5812594294548035, | |
| "learning_rate": 1.7487538706252062e-05, | |
| "loss": 0.7520013451576233, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.5417867435158501, | |
| "grad_norm": 0.6823700666427612, | |
| "learning_rate": 1.7471717763224096e-05, | |
| "loss": 0.8118777275085449, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.5432276657060519, | |
| "grad_norm": 0.5933303236961365, | |
| "learning_rate": 1.7455854369073703e-05, | |
| "loss": 0.9474261999130249, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.5446685878962536, | |
| "grad_norm": 0.5944895148277283, | |
| "learning_rate": 1.7439948613929928e-05, | |
| "loss": 0.8606828451156616, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.5461095100864554, | |
| "grad_norm": 0.6672566533088684, | |
| "learning_rate": 1.742400058816252e-05, | |
| "loss": 0.9909709692001343, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.547550432276657, | |
| "grad_norm": 0.6100744009017944, | |
| "learning_rate": 1.740801038238137e-05, | |
| "loss": 1.0025837421417236, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5489913544668588, | |
| "grad_norm": 0.5349178910255432, | |
| "learning_rate": 1.7391978087436032e-05, | |
| "loss": 0.7103726863861084, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.5504322766570605, | |
| "grad_norm": 0.7046380639076233, | |
| "learning_rate": 1.737590379441518e-05, | |
| "loss": 0.8792040348052979, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.5518731988472623, | |
| "grad_norm": 0.4961056411266327, | |
| "learning_rate": 1.735978759464612e-05, | |
| "loss": 0.6803684234619141, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.553314121037464, | |
| "grad_norm": 0.679291307926178, | |
| "learning_rate": 1.734362957969423e-05, | |
| "loss": 0.9841272830963135, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.5547550432276657, | |
| "grad_norm": 0.6313779950141907, | |
| "learning_rate": 1.7327429841362494e-05, | |
| "loss": 0.9495967030525208, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.5561959654178674, | |
| "grad_norm": 0.6582711935043335, | |
| "learning_rate": 1.7311188471690925e-05, | |
| "loss": 0.883407711982727, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.5576368876080692, | |
| "grad_norm": 0.5695992112159729, | |
| "learning_rate": 1.729490556295608e-05, | |
| "loss": 0.779451847076416, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.5590778097982709, | |
| "grad_norm": 0.5270829796791077, | |
| "learning_rate": 1.7278581207670522e-05, | |
| "loss": 0.8198345899581909, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.5605187319884726, | |
| "grad_norm": 0.5536486506462097, | |
| "learning_rate": 1.72622154985823e-05, | |
| "loss": 0.8746044635772705, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.5619596541786743, | |
| "grad_norm": 0.570270836353302, | |
| "learning_rate": 1.7245808528674403e-05, | |
| "loss": 0.7180839776992798, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5634005763688761, | |
| "grad_norm": 0.5193256139755249, | |
| "learning_rate": 1.7229360391164256e-05, | |
| "loss": 0.7979388236999512, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.5648414985590778, | |
| "grad_norm": 0.6080824136734009, | |
| "learning_rate": 1.7212871179503188e-05, | |
| "loss": 0.8225536346435547, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.5662824207492796, | |
| "grad_norm": 0.5873963832855225, | |
| "learning_rate": 1.719634098737588e-05, | |
| "loss": 0.9406713247299194, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.5677233429394812, | |
| "grad_norm": 0.5575190782546997, | |
| "learning_rate": 1.7179769908699856e-05, | |
| "loss": 0.8183209896087646, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.569164265129683, | |
| "grad_norm": 0.7602899074554443, | |
| "learning_rate": 1.716315803762494e-05, | |
| "loss": 0.7549249529838562, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.5706051873198847, | |
| "grad_norm": 0.6657350659370422, | |
| "learning_rate": 1.7146505468532707e-05, | |
| "loss": 0.8715250492095947, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.5720461095100865, | |
| "grad_norm": 0.5282604694366455, | |
| "learning_rate": 1.7129812296035985e-05, | |
| "loss": 0.6946278810501099, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.5734870317002881, | |
| "grad_norm": 0.7253147959709167, | |
| "learning_rate": 1.711307861497827e-05, | |
| "loss": 0.8823874592781067, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.5749279538904899, | |
| "grad_norm": 0.5167055130004883, | |
| "learning_rate": 1.709630452043323e-05, | |
| "loss": 0.7666712999343872, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.5763688760806917, | |
| "grad_norm": 0.6416431069374084, | |
| "learning_rate": 1.707949010770413e-05, | |
| "loss": 0.9795528054237366, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5778097982708934, | |
| "grad_norm": 0.640731930732727, | |
| "learning_rate": 1.7062635472323306e-05, | |
| "loss": 0.9208186864852905, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.579250720461095, | |
| "grad_norm": 0.549220621585846, | |
| "learning_rate": 1.7045740710051637e-05, | |
| "loss": 0.6796606779098511, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.5806916426512968, | |
| "grad_norm": 0.6743412613868713, | |
| "learning_rate": 1.7028805916877975e-05, | |
| "loss": 0.9954819679260254, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.5821325648414986, | |
| "grad_norm": 0.609154462814331, | |
| "learning_rate": 1.7011831189018607e-05, | |
| "loss": 0.8255324363708496, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.5835734870317003, | |
| "grad_norm": 0.5475680828094482, | |
| "learning_rate": 1.6994816622916726e-05, | |
| "loss": 0.8262126445770264, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.5850144092219021, | |
| "grad_norm": 0.5728087425231934, | |
| "learning_rate": 1.697776231524185e-05, | |
| "loss": 0.847625195980072, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5864553314121037, | |
| "grad_norm": 0.5608763098716736, | |
| "learning_rate": 1.696066836288931e-05, | |
| "loss": 0.8561680316925049, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.5878962536023055, | |
| "grad_norm": 0.6582566499710083, | |
| "learning_rate": 1.694353486297966e-05, | |
| "loss": 0.9651436805725098, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.5893371757925072, | |
| "grad_norm": 0.6420081853866577, | |
| "learning_rate": 1.6926361912858172e-05, | |
| "loss": 0.8645302057266235, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.590778097982709, | |
| "grad_norm": 0.5092571377754211, | |
| "learning_rate": 1.6909149610094245e-05, | |
| "loss": 0.6726502180099487, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.5922190201729106, | |
| "grad_norm": 0.6565274596214294, | |
| "learning_rate": 1.689189805248085e-05, | |
| "loss": 0.8908772468566895, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.5936599423631124, | |
| "grad_norm": 0.5423445105552673, | |
| "learning_rate": 1.6874607338034015e-05, | |
| "loss": 0.815947413444519, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.5951008645533141, | |
| "grad_norm": 0.5798755288124084, | |
| "learning_rate": 1.6857277564992212e-05, | |
| "loss": 0.7958804965019226, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.5965417867435159, | |
| "grad_norm": 0.5115224123001099, | |
| "learning_rate": 1.683990883181585e-05, | |
| "loss": 0.7472406029701233, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.5979827089337176, | |
| "grad_norm": 0.48664143681526184, | |
| "learning_rate": 1.6822501237186677e-05, | |
| "loss": 0.9092934727668762, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.5994236311239193, | |
| "grad_norm": 0.5489537715911865, | |
| "learning_rate": 1.680505488000725e-05, | |
| "loss": 0.8212312459945679, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.600864553314121, | |
| "grad_norm": 0.643065869808197, | |
| "learning_rate": 1.678756985940034e-05, | |
| "loss": 1.0320334434509277, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.6023054755043228, | |
| "grad_norm": 0.5214179754257202, | |
| "learning_rate": 1.6770046274708404e-05, | |
| "loss": 0.8109217882156372, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.6037463976945245, | |
| "grad_norm": 0.5978414416313171, | |
| "learning_rate": 1.6752484225493e-05, | |
| "loss": 0.80739426612854, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.6051873198847262, | |
| "grad_norm": 0.5854954719543457, | |
| "learning_rate": 1.673488381153421e-05, | |
| "loss": 0.8603742122650146, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6066282420749279, | |
| "grad_norm": 0.5928551554679871, | |
| "learning_rate": 1.6717245132830114e-05, | |
| "loss": 0.781597375869751, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.6080691642651297, | |
| "grad_norm": 0.5761047601699829, | |
| "learning_rate": 1.6699568289596175e-05, | |
| "loss": 0.8539882898330688, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.6095100864553314, | |
| "grad_norm": 0.5701265931129456, | |
| "learning_rate": 1.66818533822647e-05, | |
| "loss": 0.7630763053894043, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.6109510086455331, | |
| "grad_norm": 0.5455740094184875, | |
| "learning_rate": 1.6664100511484252e-05, | |
| "loss": 0.8451640009880066, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.6123919308357348, | |
| "grad_norm": 0.6149104833602905, | |
| "learning_rate": 1.66463097781191e-05, | |
| "loss": 0.8973286151885986, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.6138328530259366, | |
| "grad_norm": 0.6265655159950256, | |
| "learning_rate": 1.662848128324862e-05, | |
| "loss": 0.8606114983558655, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.6152737752161384, | |
| "grad_norm": 0.5285404324531555, | |
| "learning_rate": 1.6610615128166738e-05, | |
| "loss": 0.7635661363601685, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.6167146974063401, | |
| "grad_norm": 0.522072434425354, | |
| "learning_rate": 1.659271141438135e-05, | |
| "loss": 0.8387913703918457, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.6181556195965417, | |
| "grad_norm": 0.5824821591377258, | |
| "learning_rate": 1.657477024361374e-05, | |
| "loss": 0.8419888019561768, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.6195965417867435, | |
| "grad_norm": 0.5804380774497986, | |
| "learning_rate": 1.6556791717798013e-05, | |
| "loss": 0.8360769748687744, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6210374639769453, | |
| "grad_norm": 0.6119903922080994, | |
| "learning_rate": 1.65387759390805e-05, | |
| "loss": 0.8125861883163452, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.622478386167147, | |
| "grad_norm": 0.5484216809272766, | |
| "learning_rate": 1.65207230098192e-05, | |
| "loss": 0.7709370851516724, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.6239193083573487, | |
| "grad_norm": 0.5545355081558228, | |
| "learning_rate": 1.6502633032583173e-05, | |
| "loss": 0.663250207901001, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.6253602305475504, | |
| "grad_norm": 0.5580651760101318, | |
| "learning_rate": 1.6484506110151977e-05, | |
| "loss": 0.7123851776123047, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.6268011527377522, | |
| "grad_norm": 0.6118587851524353, | |
| "learning_rate": 1.646634234551508e-05, | |
| "loss": 0.7323435544967651, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.6282420749279539, | |
| "grad_norm": 0.5335654020309448, | |
| "learning_rate": 1.6448141841871262e-05, | |
| "loss": 0.704369306564331, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.6296829971181557, | |
| "grad_norm": 0.5516027212142944, | |
| "learning_rate": 1.6429904702628044e-05, | |
| "loss": 0.7410569190979004, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.6311239193083573, | |
| "grad_norm": 0.6382977366447449, | |
| "learning_rate": 1.64116310314011e-05, | |
| "loss": 0.7958225011825562, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.6325648414985591, | |
| "grad_norm": 0.7621927261352539, | |
| "learning_rate": 1.639332093201365e-05, | |
| "loss": 0.9671751856803894, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.6340057636887608, | |
| "grad_norm": 0.5713940858840942, | |
| "learning_rate": 1.6374974508495895e-05, | |
| "loss": 0.8552824258804321, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6354466858789626, | |
| "grad_norm": 0.6350247859954834, | |
| "learning_rate": 1.6356591865084413e-05, | |
| "loss": 0.8765571117401123, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.6368876080691642, | |
| "grad_norm": 0.5537572503089905, | |
| "learning_rate": 1.633817310622156e-05, | |
| "loss": 0.7851294279098511, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.638328530259366, | |
| "grad_norm": 0.5975667834281921, | |
| "learning_rate": 1.631971833655489e-05, | |
| "loss": 0.8070282936096191, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.6397694524495677, | |
| "grad_norm": 0.5319865345954895, | |
| "learning_rate": 1.630122766093656e-05, | |
| "loss": 0.8131473660469055, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.6412103746397695, | |
| "grad_norm": 0.635881245136261, | |
| "learning_rate": 1.6282701184422717e-05, | |
| "loss": 0.8592699766159058, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.6426512968299711, | |
| "grad_norm": 0.8621135950088501, | |
| "learning_rate": 1.6264139012272927e-05, | |
| "loss": 0.8750573992729187, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.6440922190201729, | |
| "grad_norm": 0.5308735966682434, | |
| "learning_rate": 1.6245541249949558e-05, | |
| "loss": 0.754439651966095, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.6455331412103746, | |
| "grad_norm": 0.583328366279602, | |
| "learning_rate": 1.622690800311718e-05, | |
| "loss": 0.9887423515319824, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.6469740634005764, | |
| "grad_norm": 0.5716376304626465, | |
| "learning_rate": 1.620823937764198e-05, | |
| "loss": 0.6914777755737305, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.6484149855907781, | |
| "grad_norm": 0.510493814945221, | |
| "learning_rate": 1.618953547959115e-05, | |
| "loss": 0.7186870574951172, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6498559077809798, | |
| "grad_norm": 0.5852826833724976, | |
| "learning_rate": 1.6170796415232278e-05, | |
| "loss": 0.8185505867004395, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.6512968299711815, | |
| "grad_norm": 0.5349909067153931, | |
| "learning_rate": 1.615202229103276e-05, | |
| "loss": 0.8269165754318237, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.6527377521613833, | |
| "grad_norm": 0.5847293734550476, | |
| "learning_rate": 1.613321321365918e-05, | |
| "loss": 0.7098100185394287, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.654178674351585, | |
| "grad_norm": 0.7040385603904724, | |
| "learning_rate": 1.6114369289976727e-05, | |
| "loss": 0.8710377216339111, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.6556195965417867, | |
| "grad_norm": 0.519795298576355, | |
| "learning_rate": 1.609549062704855e-05, | |
| "loss": 0.788497805595398, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.6570605187319885, | |
| "grad_norm": 0.5721155405044556, | |
| "learning_rate": 1.607657733213519e-05, | |
| "loss": 0.8074417114257812, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.6585014409221902, | |
| "grad_norm": 0.5255273580551147, | |
| "learning_rate": 1.6057629512693938e-05, | |
| "loss": 0.8402494788169861, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.659942363112392, | |
| "grad_norm": 0.6352676153182983, | |
| "learning_rate": 1.6038647276378246e-05, | |
| "loss": 0.7989722490310669, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.6613832853025937, | |
| "grad_norm": 0.5911176800727844, | |
| "learning_rate": 1.601963073103711e-05, | |
| "loss": 0.8456016778945923, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.6628242074927954, | |
| "grad_norm": 0.5407702326774597, | |
| "learning_rate": 1.6000579984714453e-05, | |
| "loss": 0.9468159675598145, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6642651296829971, | |
| "grad_norm": 0.6090099215507507, | |
| "learning_rate": 1.5981495145648507e-05, | |
| "loss": 0.8110009431838989, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.6657060518731989, | |
| "grad_norm": 0.6194032430648804, | |
| "learning_rate": 1.5962376322271218e-05, | |
| "loss": 0.7852393388748169, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.6671469740634006, | |
| "grad_norm": 0.620429515838623, | |
| "learning_rate": 1.5943223623207608e-05, | |
| "loss": 0.6961592435836792, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.6685878962536023, | |
| "grad_norm": 0.524815022945404, | |
| "learning_rate": 1.5924037157275156e-05, | |
| "loss": 0.7707295417785645, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.670028818443804, | |
| "grad_norm": 0.5691516399383545, | |
| "learning_rate": 1.5904817033483216e-05, | |
| "loss": 0.936732292175293, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.6714697406340058, | |
| "grad_norm": 0.5965140461921692, | |
| "learning_rate": 1.588556336103235e-05, | |
| "loss": 0.9365247488021851, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.6729106628242075, | |
| "grad_norm": 0.5429266095161438, | |
| "learning_rate": 1.586627624931373e-05, | |
| "loss": 0.7953989505767822, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.6743515850144092, | |
| "grad_norm": 0.6360976099967957, | |
| "learning_rate": 1.584695580790853e-05, | |
| "loss": 0.8234362602233887, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.6757925072046109, | |
| "grad_norm": 0.5487512350082397, | |
| "learning_rate": 1.5827602146587277e-05, | |
| "loss": 0.8714422583580017, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.6772334293948127, | |
| "grad_norm": 0.5767121315002441, | |
| "learning_rate": 1.5808215375309243e-05, | |
| "loss": 0.7444263696670532, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6786743515850144, | |
| "grad_norm": 0.5978402495384216, | |
| "learning_rate": 1.578879560422182e-05, | |
| "loss": 0.7328246235847473, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.6801152737752162, | |
| "grad_norm": 0.6980924010276794, | |
| "learning_rate": 1.576934294365988e-05, | |
| "loss": 0.9335816502571106, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.6815561959654178, | |
| "grad_norm": 0.578369677066803, | |
| "learning_rate": 1.574985750414518e-05, | |
| "loss": 0.7987563610076904, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.6829971181556196, | |
| "grad_norm": 0.5886070728302002, | |
| "learning_rate": 1.5730339396385684e-05, | |
| "loss": 0.8044711351394653, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.6844380403458213, | |
| "grad_norm": 0.5407376885414124, | |
| "learning_rate": 1.571078873127499e-05, | |
| "loss": 0.7568783760070801, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.6858789625360231, | |
| "grad_norm": 0.5557326674461365, | |
| "learning_rate": 1.569120561989166e-05, | |
| "loss": 0.9148538112640381, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.6873198847262247, | |
| "grad_norm": 0.6909852027893066, | |
| "learning_rate": 1.5671590173498602e-05, | |
| "loss": 0.8718932867050171, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.6887608069164265, | |
| "grad_norm": 0.5995265245437622, | |
| "learning_rate": 1.5651942503542435e-05, | |
| "loss": 0.815102219581604, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.6902017291066282, | |
| "grad_norm": 0.6188293099403381, | |
| "learning_rate": 1.563226272165287e-05, | |
| "loss": 0.8449897766113281, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.69164265129683, | |
| "grad_norm": 0.5536366701126099, | |
| "learning_rate": 1.561255093964205e-05, | |
| "loss": 0.6867048740386963, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6930835734870316, | |
| "grad_norm": 0.5375257134437561, | |
| "learning_rate": 1.559280726950395e-05, | |
| "loss": 0.7475601434707642, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.6945244956772334, | |
| "grad_norm": 0.5647540092468262, | |
| "learning_rate": 1.557303182341369e-05, | |
| "loss": 0.787548303604126, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.6959654178674352, | |
| "grad_norm": 0.6174569129943848, | |
| "learning_rate": 1.5553224713726954e-05, | |
| "loss": 0.806119441986084, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.6974063400576369, | |
| "grad_norm": 0.5920992493629456, | |
| "learning_rate": 1.553338605297931e-05, | |
| "loss": 0.7667924761772156, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.6988472622478387, | |
| "grad_norm": 0.5590972900390625, | |
| "learning_rate": 1.55135159538856e-05, | |
| "loss": 0.757071852684021, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.7002881844380403, | |
| "grad_norm": 0.7274153232574463, | |
| "learning_rate": 1.549361452933926e-05, | |
| "loss": 0.8193639516830444, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.7017291066282421, | |
| "grad_norm": 0.6040515899658203, | |
| "learning_rate": 1.5473681892411733e-05, | |
| "loss": 0.7873412370681763, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.7031700288184438, | |
| "grad_norm": 0.5784795880317688, | |
| "learning_rate": 1.5453718156351775e-05, | |
| "loss": 0.8369981050491333, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.7046109510086456, | |
| "grad_norm": 0.6267423629760742, | |
| "learning_rate": 1.543372343458485e-05, | |
| "loss": 0.9070534110069275, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.7060518731988472, | |
| "grad_norm": 0.6215807199478149, | |
| "learning_rate": 1.541369784071246e-05, | |
| "loss": 0.8574085235595703, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.707492795389049, | |
| "grad_norm": 0.6305344104766846, | |
| "learning_rate": 1.5393641488511514e-05, | |
| "loss": 0.8450196981430054, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.7089337175792507, | |
| "grad_norm": 0.5847838521003723, | |
| "learning_rate": 1.537355449193367e-05, | |
| "loss": 0.7976102828979492, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.7103746397694525, | |
| "grad_norm": 0.5669839382171631, | |
| "learning_rate": 1.5353436965104708e-05, | |
| "loss": 0.9691898822784424, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.7118155619596542, | |
| "grad_norm": 0.5651892423629761, | |
| "learning_rate": 1.533328902232385e-05, | |
| "loss": 0.8189429044723511, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.7132564841498559, | |
| "grad_norm": 0.5902916193008423, | |
| "learning_rate": 1.5313110778063142e-05, | |
| "loss": 0.9311359524726868, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.7146974063400576, | |
| "grad_norm": 0.5827770233154297, | |
| "learning_rate": 1.5292902346966782e-05, | |
| "loss": 0.8031175136566162, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.7161383285302594, | |
| "grad_norm": 0.5901687741279602, | |
| "learning_rate": 1.5272663843850484e-05, | |
| "loss": 0.7599796056747437, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.7175792507204611, | |
| "grad_norm": 0.5782361626625061, | |
| "learning_rate": 1.5252395383700815e-05, | |
| "loss": 0.6886292099952698, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.7190201729106628, | |
| "grad_norm": 0.6033108830451965, | |
| "learning_rate": 1.5232097081674542e-05, | |
| "loss": 0.9031864404678345, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.7204610951008645, | |
| "grad_norm": 0.5347855091094971, | |
| "learning_rate": 1.5211769053097984e-05, | |
| "loss": 0.8068576455116272, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7219020172910663, | |
| "grad_norm": 0.580989420413971, | |
| "learning_rate": 1.5191411413466357e-05, | |
| "loss": 0.7699841260910034, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.723342939481268, | |
| "grad_norm": 0.5658376216888428, | |
| "learning_rate": 1.5171024278443108e-05, | |
| "loss": 0.8388891220092773, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.7247838616714697, | |
| "grad_norm": 0.5904268622398376, | |
| "learning_rate": 1.5150607763859266e-05, | |
| "loss": 0.9313668012619019, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.7262247838616714, | |
| "grad_norm": 0.5787760019302368, | |
| "learning_rate": 1.5130161985712786e-05, | |
| "loss": 0.7764126062393188, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.7276657060518732, | |
| "grad_norm": 0.6377682089805603, | |
| "learning_rate": 1.510968706016788e-05, | |
| "loss": 0.9689415693283081, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.729106628242075, | |
| "grad_norm": 0.5599706172943115, | |
| "learning_rate": 1.5089183103554372e-05, | |
| "loss": 0.8220781087875366, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.7305475504322767, | |
| "grad_norm": 0.48352760076522827, | |
| "learning_rate": 1.506865023236702e-05, | |
| "loss": 0.7756180763244629, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.7319884726224783, | |
| "grad_norm": 0.5906649827957153, | |
| "learning_rate": 1.504808856326486e-05, | |
| "loss": 0.8613312840461731, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.7334293948126801, | |
| "grad_norm": 0.7078525424003601, | |
| "learning_rate": 1.5027498213070558e-05, | |
| "loss": 0.9191365838050842, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.7348703170028819, | |
| "grad_norm": 0.5789520144462585, | |
| "learning_rate": 1.5006879298769721e-05, | |
| "loss": 0.7827133536338806, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.7363112391930836, | |
| "grad_norm": 0.6121973395347595, | |
| "learning_rate": 1.498623193751025e-05, | |
| "loss": 0.8028290271759033, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.7377521613832853, | |
| "grad_norm": 0.6222305297851562, | |
| "learning_rate": 1.4965556246601677e-05, | |
| "loss": 1.0173910856246948, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.739193083573487, | |
| "grad_norm": 0.6844464540481567, | |
| "learning_rate": 1.4944852343514478e-05, | |
| "loss": 0.9411607980728149, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.7406340057636888, | |
| "grad_norm": 0.5612237453460693, | |
| "learning_rate": 1.4924120345879422e-05, | |
| "loss": 0.7112851738929749, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.7420749279538905, | |
| "grad_norm": 0.602611780166626, | |
| "learning_rate": 1.490336037148691e-05, | |
| "loss": 0.811668872833252, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.7435158501440923, | |
| "grad_norm": 0.5829697251319885, | |
| "learning_rate": 1.4882572538286279e-05, | |
| "loss": 0.8444674015045166, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.7449567723342939, | |
| "grad_norm": 0.6352137923240662, | |
| "learning_rate": 1.486175696438516e-05, | |
| "loss": 0.8086113333702087, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.7463976945244957, | |
| "grad_norm": 0.5597201585769653, | |
| "learning_rate": 1.4840913768048788e-05, | |
| "loss": 0.7531715631484985, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.7478386167146974, | |
| "grad_norm": 0.6141948103904724, | |
| "learning_rate": 1.4820043067699342e-05, | |
| "loss": 0.8946848511695862, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.7492795389048992, | |
| "grad_norm": 0.6349887251853943, | |
| "learning_rate": 1.479914498191526e-05, | |
| "loss": 0.9019135236740112, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7507204610951008, | |
| "grad_norm": 0.5442377924919128, | |
| "learning_rate": 1.4778219629430585e-05, | |
| "loss": 0.8436559438705444, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.7521613832853026, | |
| "grad_norm": 0.5552181601524353, | |
| "learning_rate": 1.4757267129134266e-05, | |
| "loss": 0.7609561681747437, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.7536023054755043, | |
| "grad_norm": 0.5454914569854736, | |
| "learning_rate": 1.4736287600069493e-05, | |
| "loss": 0.6811736822128296, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.7550432276657061, | |
| "grad_norm": 0.5283980965614319, | |
| "learning_rate": 1.4715281161433032e-05, | |
| "loss": 0.6988868713378906, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.7564841498559077, | |
| "grad_norm": 0.6911367177963257, | |
| "learning_rate": 1.4694247932574533e-05, | |
| "loss": 0.7650970220565796, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.7579250720461095, | |
| "grad_norm": 0.6973710060119629, | |
| "learning_rate": 1.4673188032995858e-05, | |
| "loss": 0.8932456970214844, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.7593659942363112, | |
| "grad_norm": 0.573573887348175, | |
| "learning_rate": 1.4652101582350394e-05, | |
| "loss": 0.8257400989532471, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.760806916426513, | |
| "grad_norm": 0.6292358040809631, | |
| "learning_rate": 1.4630988700442386e-05, | |
| "loss": 0.7898586988449097, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.7622478386167147, | |
| "grad_norm": 0.6209089159965515, | |
| "learning_rate": 1.4609849507226254e-05, | |
| "loss": 0.8471835851669312, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.7636887608069164, | |
| "grad_norm": 0.5322726368904114, | |
| "learning_rate": 1.4588684122805895e-05, | |
| "loss": 0.6654898524284363, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7651296829971181, | |
| "grad_norm": 0.600330114364624, | |
| "learning_rate": 1.4567492667434031e-05, | |
| "loss": 0.6953059434890747, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.7665706051873199, | |
| "grad_norm": 0.6372314691543579, | |
| "learning_rate": 1.4546275261511493e-05, | |
| "loss": 0.8754534721374512, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.7680115273775217, | |
| "grad_norm": 0.5352574586868286, | |
| "learning_rate": 1.4525032025586555e-05, | |
| "loss": 0.646438717842102, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.7694524495677233, | |
| "grad_norm": 0.5772659182548523, | |
| "learning_rate": 1.450376308035425e-05, | |
| "loss": 0.7625031471252441, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.770893371757925, | |
| "grad_norm": 0.6228421926498413, | |
| "learning_rate": 1.4482468546655679e-05, | |
| "loss": 0.8432234525680542, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.7723342939481268, | |
| "grad_norm": 0.5398774147033691, | |
| "learning_rate": 1.4461148545477328e-05, | |
| "loss": 0.8652327060699463, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.7737752161383286, | |
| "grad_norm": 0.6024429798126221, | |
| "learning_rate": 1.443980319795037e-05, | |
| "loss": 0.853449821472168, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.7752161383285303, | |
| "grad_norm": 0.6293278336524963, | |
| "learning_rate": 1.4418432625349997e-05, | |
| "loss": 1.0636059045791626, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.776657060518732, | |
| "grad_norm": 0.6416622996330261, | |
| "learning_rate": 1.439703694909471e-05, | |
| "loss": 0.7662414312362671, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.7780979827089337, | |
| "grad_norm": 0.5270242094993591, | |
| "learning_rate": 1.437561629074564e-05, | |
| "loss": 0.7614691257476807, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.7795389048991355, | |
| "grad_norm": 0.6011450290679932, | |
| "learning_rate": 1.4354170772005862e-05, | |
| "loss": 0.8249303698539734, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.7809798270893372, | |
| "grad_norm": 0.570554256439209, | |
| "learning_rate": 1.4332700514719687e-05, | |
| "loss": 0.8420515060424805, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.7824207492795389, | |
| "grad_norm": 0.6834957003593445, | |
| "learning_rate": 1.4311205640871985e-05, | |
| "loss": 0.7347505688667297, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.7838616714697406, | |
| "grad_norm": 0.5610590577125549, | |
| "learning_rate": 1.4289686272587493e-05, | |
| "loss": 0.7387393712997437, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.7853025936599424, | |
| "grad_norm": 0.5757878422737122, | |
| "learning_rate": 1.4268142532130102e-05, | |
| "loss": 0.8649142980575562, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.7867435158501441, | |
| "grad_norm": 0.5952373147010803, | |
| "learning_rate": 1.4246574541902187e-05, | |
| "loss": 0.9021813273429871, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.7881844380403458, | |
| "grad_norm": 0.564714789390564, | |
| "learning_rate": 1.4224982424443894e-05, | |
| "loss": 0.8700401186943054, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.7896253602305475, | |
| "grad_norm": 0.5540585517883301, | |
| "learning_rate": 1.4203366302432447e-05, | |
| "loss": 0.904576301574707, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.7910662824207493, | |
| "grad_norm": 0.5750954151153564, | |
| "learning_rate": 1.4181726298681462e-05, | |
| "loss": 0.6933422088623047, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.792507204610951, | |
| "grad_norm": 0.6007941365242004, | |
| "learning_rate": 1.4160062536140235e-05, | |
| "loss": 0.7416195869445801, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7939481268011528, | |
| "grad_norm": 0.6115585565567017, | |
| "learning_rate": 1.413837513789305e-05, | |
| "loss": 0.7834339141845703, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.7953890489913544, | |
| "grad_norm": 0.5994195342063904, | |
| "learning_rate": 1.4116664227158481e-05, | |
| "loss": 0.7905891537666321, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.7968299711815562, | |
| "grad_norm": 0.6097184419631958, | |
| "learning_rate": 1.4094929927288688e-05, | |
| "loss": 0.9327152967453003, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.7982708933717579, | |
| "grad_norm": 0.6687369346618652, | |
| "learning_rate": 1.4073172361768715e-05, | |
| "loss": 1.0150576829910278, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.7997118155619597, | |
| "grad_norm": 0.5856130719184875, | |
| "learning_rate": 1.4051391654215803e-05, | |
| "loss": 0.7339403629302979, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.8011527377521613, | |
| "grad_norm": 0.5845277309417725, | |
| "learning_rate": 1.402958792837866e-05, | |
| "loss": 0.8820422887802124, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.8025936599423631, | |
| "grad_norm": 0.5744795203208923, | |
| "learning_rate": 1.4007761308136791e-05, | |
| "loss": 0.7946295738220215, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.8040345821325648, | |
| "grad_norm": 0.5813792943954468, | |
| "learning_rate": 1.3985911917499764e-05, | |
| "loss": 0.8150988221168518, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.8054755043227666, | |
| "grad_norm": 0.5602363348007202, | |
| "learning_rate": 1.3964039880606522e-05, | |
| "loss": 0.7896089553833008, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.8069164265129684, | |
| "grad_norm": 0.5744377374649048, | |
| "learning_rate": 1.3942145321724678e-05, | |
| "loss": 0.6854937076568604, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.80835734870317, | |
| "grad_norm": 0.5629292130470276, | |
| "learning_rate": 1.3920228365249807e-05, | |
| "loss": 0.8506255149841309, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.8097982708933718, | |
| "grad_norm": 0.6157159209251404, | |
| "learning_rate": 1.3898289135704726e-05, | |
| "loss": 0.8277969360351562, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.8112391930835735, | |
| "grad_norm": 0.6000183820724487, | |
| "learning_rate": 1.387632775773881e-05, | |
| "loss": 0.7690261006355286, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.8126801152737753, | |
| "grad_norm": 0.5285099744796753, | |
| "learning_rate": 1.3854344356127272e-05, | |
| "loss": 0.8356052041053772, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.8141210374639769, | |
| "grad_norm": 0.5782726407051086, | |
| "learning_rate": 1.3832339055770443e-05, | |
| "loss": 0.7185315489768982, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.8155619596541787, | |
| "grad_norm": 0.6152368187904358, | |
| "learning_rate": 1.3810311981693084e-05, | |
| "loss": 0.8090786337852478, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.8170028818443804, | |
| "grad_norm": 0.6380811929702759, | |
| "learning_rate": 1.378826325904366e-05, | |
| "loss": 0.786938488483429, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.8184438040345822, | |
| "grad_norm": 0.5521567463874817, | |
| "learning_rate": 1.3766193013093637e-05, | |
| "loss": 0.754514217376709, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.8198847262247838, | |
| "grad_norm": 0.5184625387191772, | |
| "learning_rate": 1.3744101369236765e-05, | |
| "loss": 0.8483080863952637, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.8213256484149856, | |
| "grad_norm": 0.5939867496490479, | |
| "learning_rate": 1.3721988452988366e-05, | |
| "loss": 0.7971341609954834, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.8227665706051873, | |
| "grad_norm": 0.6107808947563171, | |
| "learning_rate": 1.3699854389984626e-05, | |
| "loss": 0.7329815626144409, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.8242074927953891, | |
| "grad_norm": 0.5994635820388794, | |
| "learning_rate": 1.367769930598188e-05, | |
| "loss": 0.6906665563583374, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.8256484149855908, | |
| "grad_norm": 0.7389527559280396, | |
| "learning_rate": 1.3655523326855889e-05, | |
| "loss": 0.9652154445648193, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.8270893371757925, | |
| "grad_norm": 0.6304334402084351, | |
| "learning_rate": 1.3633326578601133e-05, | |
| "loss": 0.7543058395385742, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.8285302593659942, | |
| "grad_norm": 0.6436780095100403, | |
| "learning_rate": 1.36111091873301e-05, | |
| "loss": 0.7568035125732422, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.829971181556196, | |
| "grad_norm": 0.6232560873031616, | |
| "learning_rate": 1.3588871279272553e-05, | |
| "loss": 0.821622371673584, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.8314121037463977, | |
| "grad_norm": 0.585903525352478, | |
| "learning_rate": 1.356661298077483e-05, | |
| "loss": 0.6720898151397705, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.8328530259365994, | |
| "grad_norm": 0.6891927719116211, | |
| "learning_rate": 1.3544334418299115e-05, | |
| "loss": 0.8245535492897034, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.8342939481268011, | |
| "grad_norm": 0.6329434514045715, | |
| "learning_rate": 1.3522035718422722e-05, | |
| "loss": 0.7789890766143799, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.8357348703170029, | |
| "grad_norm": 0.5948352813720703, | |
| "learning_rate": 1.3499717007837381e-05, | |
| "loss": 0.7675924301147461, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8371757925072046, | |
| "grad_norm": 0.699979841709137, | |
| "learning_rate": 1.3477378413348516e-05, | |
| "loss": 1.040189504623413, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.8386167146974063, | |
| "grad_norm": 0.5986906290054321, | |
| "learning_rate": 1.3455020061874517e-05, | |
| "loss": 0.7689225077629089, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.840057636887608, | |
| "grad_norm": 0.5747967958450317, | |
| "learning_rate": 1.343264208044603e-05, | |
| "loss": 0.8100346326828003, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.8414985590778098, | |
| "grad_norm": 0.5550184845924377, | |
| "learning_rate": 1.3410244596205222e-05, | |
| "loss": 0.6612875461578369, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.8429394812680115, | |
| "grad_norm": 0.5810987949371338, | |
| "learning_rate": 1.3387827736405079e-05, | |
| "loss": 0.8044896721839905, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.8443804034582133, | |
| "grad_norm": 0.5891236066818237, | |
| "learning_rate": 1.336539162840866e-05, | |
| "loss": 0.7642381191253662, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.845821325648415, | |
| "grad_norm": 0.5809030532836914, | |
| "learning_rate": 1.3342936399688387e-05, | |
| "loss": 0.9018023014068604, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.8472622478386167, | |
| "grad_norm": 0.5283681750297546, | |
| "learning_rate": 1.3320462177825321e-05, | |
| "loss": 0.6488313674926758, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.8487031700288185, | |
| "grad_norm": 0.691789448261261, | |
| "learning_rate": 1.3297969090508434e-05, | |
| "loss": 0.9001842737197876, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.8501440922190202, | |
| "grad_norm": 0.5393000245094299, | |
| "learning_rate": 1.3275457265533876e-05, | |
| "loss": 0.7306933403015137, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8515850144092219, | |
| "grad_norm": 0.5468747615814209, | |
| "learning_rate": 1.3252926830804264e-05, | |
| "loss": 0.7733805179595947, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.8530259365994236, | |
| "grad_norm": 0.6384521722793579, | |
| "learning_rate": 1.323037791432795e-05, | |
| "loss": 0.8655095100402832, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.8544668587896254, | |
| "grad_norm": 0.6655124425888062, | |
| "learning_rate": 1.3207810644218277e-05, | |
| "loss": 0.8915953040122986, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.8559077809798271, | |
| "grad_norm": 0.8409086465835571, | |
| "learning_rate": 1.3185225148692884e-05, | |
| "loss": 0.7861893773078918, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.8573487031700289, | |
| "grad_norm": 0.529315173625946, | |
| "learning_rate": 1.316262155607295e-05, | |
| "loss": 0.6976417303085327, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.8587896253602305, | |
| "grad_norm": 0.7067042589187622, | |
| "learning_rate": 1.3139999994782468e-05, | |
| "loss": 0.7654718160629272, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.8602305475504323, | |
| "grad_norm": 0.5580388903617859, | |
| "learning_rate": 1.3117360593347535e-05, | |
| "loss": 0.7656145095825195, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.861671469740634, | |
| "grad_norm": 0.5642116665840149, | |
| "learning_rate": 1.3094703480395597e-05, | |
| "loss": 0.8965026140213013, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.8631123919308358, | |
| "grad_norm": 0.5922830700874329, | |
| "learning_rate": 1.3072028784654732e-05, | |
| "loss": 0.7152601480484009, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.8645533141210374, | |
| "grad_norm": 0.6355752944946289, | |
| "learning_rate": 1.3049336634952918e-05, | |
| "loss": 0.8053656816482544, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8659942363112392, | |
| "grad_norm": 0.5964752435684204, | |
| "learning_rate": 1.3026627160217302e-05, | |
| "loss": 0.8105225563049316, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.8674351585014409, | |
| "grad_norm": 0.5729430913925171, | |
| "learning_rate": 1.3003900489473455e-05, | |
| "loss": 0.7246596217155457, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.8688760806916427, | |
| "grad_norm": 0.5772424340248108, | |
| "learning_rate": 1.2981156751844659e-05, | |
| "loss": 0.878759503364563, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.8703170028818443, | |
| "grad_norm": 0.6068733334541321, | |
| "learning_rate": 1.2958396076551157e-05, | |
| "loss": 0.771674633026123, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.8717579250720461, | |
| "grad_norm": 0.7268223762512207, | |
| "learning_rate": 1.2935618592909419e-05, | |
| "loss": 0.8257685899734497, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.8731988472622478, | |
| "grad_norm": 0.5236421227455139, | |
| "learning_rate": 1.2912824430331425e-05, | |
| "loss": 0.7433541417121887, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.8746397694524496, | |
| "grad_norm": 0.6350269913673401, | |
| "learning_rate": 1.2890013718323913e-05, | |
| "loss": 0.7988302111625671, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.8760806916426513, | |
| "grad_norm": 0.5974761843681335, | |
| "learning_rate": 1.2867186586487642e-05, | |
| "loss": 0.69598388671875, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.877521613832853, | |
| "grad_norm": 0.6555905938148499, | |
| "learning_rate": 1.2844343164516671e-05, | |
| "loss": 0.8490002155303955, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.8789625360230547, | |
| "grad_norm": 0.5933155417442322, | |
| "learning_rate": 1.2821483582197604e-05, | |
| "loss": 0.8980555534362793, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8804034582132565, | |
| "grad_norm": 0.5820237994194031, | |
| "learning_rate": 1.2798607969408865e-05, | |
| "loss": 0.6644209623336792, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.8818443804034583, | |
| "grad_norm": 0.7071488499641418, | |
| "learning_rate": 1.2775716456119962e-05, | |
| "loss": 0.8648644685745239, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.8832853025936599, | |
| "grad_norm": 0.6479011178016663, | |
| "learning_rate": 1.275280917239073e-05, | |
| "loss": 0.7517762184143066, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.8847262247838616, | |
| "grad_norm": 0.5966110825538635, | |
| "learning_rate": 1.2729886248370616e-05, | |
| "loss": 0.8572617173194885, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.8861671469740634, | |
| "grad_norm": 0.644463062286377, | |
| "learning_rate": 1.2706947814297923e-05, | |
| "loss": 0.9164141416549683, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.8876080691642652, | |
| "grad_norm": 0.5812889933586121, | |
| "learning_rate": 1.2683994000499078e-05, | |
| "loss": 0.8233826160430908, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.8890489913544669, | |
| "grad_norm": 0.5846764445304871, | |
| "learning_rate": 1.2661024937387888e-05, | |
| "loss": 0.7792487144470215, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.8904899135446686, | |
| "grad_norm": 0.5830758810043335, | |
| "learning_rate": 1.2638040755464802e-05, | |
| "loss": 0.8183486461639404, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.8919308357348703, | |
| "grad_norm": 0.600700855255127, | |
| "learning_rate": 1.2615041585316163e-05, | |
| "loss": 0.8133193254470825, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.8933717579250721, | |
| "grad_norm": 0.7301390767097473, | |
| "learning_rate": 1.2592027557613476e-05, | |
| "loss": 0.9990606307983398, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8948126801152738, | |
| "grad_norm": 0.6379325985908508, | |
| "learning_rate": 1.2568998803112658e-05, | |
| "loss": 0.9970605373382568, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.8962536023054755, | |
| "grad_norm": 0.6173595190048218, | |
| "learning_rate": 1.2545955452653294e-05, | |
| "loss": 0.7785443067550659, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.8976945244956772, | |
| "grad_norm": 0.5701093673706055, | |
| "learning_rate": 1.2522897637157905e-05, | |
| "loss": 0.7518781423568726, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.899135446685879, | |
| "grad_norm": 0.5610460638999939, | |
| "learning_rate": 1.249982548763119e-05, | |
| "loss": 0.7533529996871948, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.9005763688760807, | |
| "grad_norm": 0.5958570837974548, | |
| "learning_rate": 1.2476739135159286e-05, | |
| "loss": 0.7188445329666138, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.9020172910662824, | |
| "grad_norm": 0.6020827889442444, | |
| "learning_rate": 1.2453638710909033e-05, | |
| "loss": 0.821806788444519, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.9034582132564841, | |
| "grad_norm": 0.6721755266189575, | |
| "learning_rate": 1.2430524346127215e-05, | |
| "loss": 0.829516589641571, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.9048991354466859, | |
| "grad_norm": 0.648987889289856, | |
| "learning_rate": 1.2407396172139822e-05, | |
| "loss": 0.8926085233688354, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.9063400576368876, | |
| "grad_norm": 0.6156898140907288, | |
| "learning_rate": 1.2384254320351301e-05, | |
| "loss": 0.6972285509109497, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.9077809798270894, | |
| "grad_norm": 0.5631294846534729, | |
| "learning_rate": 1.2361098922243812e-05, | |
| "loss": 0.8096356391906738, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.909221902017291, | |
| "grad_norm": 0.5868187546730042, | |
| "learning_rate": 1.233793010937648e-05, | |
| "loss": 0.8927261233329773, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.9106628242074928, | |
| "grad_norm": 0.7092884182929993, | |
| "learning_rate": 1.2314748013384639e-05, | |
| "loss": 0.8122037649154663, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.9121037463976945, | |
| "grad_norm": 0.5598758459091187, | |
| "learning_rate": 1.2291552765979104e-05, | |
| "loss": 0.7103347182273865, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.9135446685878963, | |
| "grad_norm": 0.5712397694587708, | |
| "learning_rate": 1.2268344498945404e-05, | |
| "loss": 0.8768525123596191, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.9149855907780979, | |
| "grad_norm": 0.6459716558456421, | |
| "learning_rate": 1.2245123344143044e-05, | |
| "loss": 0.8588672876358032, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.9164265129682997, | |
| "grad_norm": 0.6647771000862122, | |
| "learning_rate": 1.2221889433504743e-05, | |
| "loss": 0.838790237903595, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.9178674351585014, | |
| "grad_norm": 0.630342960357666, | |
| "learning_rate": 1.2198642899035704e-05, | |
| "loss": 0.8317841291427612, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.9193083573487032, | |
| "grad_norm": 0.6161531209945679, | |
| "learning_rate": 1.2175383872812851e-05, | |
| "loss": 0.8737014532089233, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.920749279538905, | |
| "grad_norm": 0.5237288475036621, | |
| "learning_rate": 1.2152112486984071e-05, | |
| "loss": 0.7703773975372314, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.9221902017291066, | |
| "grad_norm": 0.6804758310317993, | |
| "learning_rate": 1.2128828873767487e-05, | |
| "loss": 0.8832876682281494, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9236311239193083, | |
| "grad_norm": 0.6326414942741394, | |
| "learning_rate": 1.210553316545068e-05, | |
| "loss": 0.8082267045974731, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.9250720461095101, | |
| "grad_norm": 0.518068253993988, | |
| "learning_rate": 1.2082225494389961e-05, | |
| "loss": 0.8200917840003967, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.9265129682997119, | |
| "grad_norm": 0.6239233016967773, | |
| "learning_rate": 1.2058905993009604e-05, | |
| "loss": 0.7693580389022827, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.9279538904899135, | |
| "grad_norm": 0.7250560522079468, | |
| "learning_rate": 1.2035574793801095e-05, | |
| "loss": 0.9098770618438721, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.9293948126801153, | |
| "grad_norm": 0.5638466477394104, | |
| "learning_rate": 1.2012232029322384e-05, | |
| "loss": 0.8906189799308777, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.930835734870317, | |
| "grad_norm": 0.5933250188827515, | |
| "learning_rate": 1.1988877832197135e-05, | |
| "loss": 0.8694485425949097, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.9322766570605188, | |
| "grad_norm": 0.5953240394592285, | |
| "learning_rate": 1.1965512335113958e-05, | |
| "loss": 0.6657785177230835, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.9337175792507204, | |
| "grad_norm": 0.7607654929161072, | |
| "learning_rate": 1.1942135670825672e-05, | |
| "loss": 0.8424980044364929, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.9351585014409222, | |
| "grad_norm": 0.5857916474342346, | |
| "learning_rate": 1.1918747972148541e-05, | |
| "loss": 0.8536664247512817, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.9365994236311239, | |
| "grad_norm": 0.6392703056335449, | |
| "learning_rate": 1.189534937196152e-05, | |
| "loss": 0.7639260292053223, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9380403458213257, | |
| "grad_norm": 0.6499249339103699, | |
| "learning_rate": 1.1871940003205505e-05, | |
| "loss": 0.6486794948577881, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.9394812680115274, | |
| "grad_norm": 0.5829477906227112, | |
| "learning_rate": 1.1848519998882572e-05, | |
| "loss": 0.8788424730300903, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.9409221902017291, | |
| "grad_norm": 0.6531436443328857, | |
| "learning_rate": 1.1825089492055227e-05, | |
| "loss": 0.7516967058181763, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.9423631123919308, | |
| "grad_norm": 0.5762933492660522, | |
| "learning_rate": 1.1801648615845638e-05, | |
| "loss": 0.8271253108978271, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.9438040345821326, | |
| "grad_norm": 0.5682176351547241, | |
| "learning_rate": 1.17781975034349e-05, | |
| "loss": 0.7439221143722534, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.9452449567723343, | |
| "grad_norm": 0.6221916675567627, | |
| "learning_rate": 1.1754736288062256e-05, | |
| "loss": 0.8153043985366821, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.946685878962536, | |
| "grad_norm": 0.6802467107772827, | |
| "learning_rate": 1.1731265103024351e-05, | |
| "loss": 0.7968660593032837, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.9481268011527377, | |
| "grad_norm": 0.5722571611404419, | |
| "learning_rate": 1.1707784081674482e-05, | |
| "loss": 0.7626321315765381, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.9495677233429395, | |
| "grad_norm": 0.6222584247589111, | |
| "learning_rate": 1.1684293357421824e-05, | |
| "loss": 0.7135765552520752, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.9510086455331412, | |
| "grad_norm": 0.6431753039360046, | |
| "learning_rate": 1.1660793063730674e-05, | |
| "loss": 0.7373322248458862, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.952449567723343, | |
| "grad_norm": 0.6022802591323853, | |
| "learning_rate": 1.1637283334119713e-05, | |
| "loss": 0.7500289678573608, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.9538904899135446, | |
| "grad_norm": 0.5709948539733887, | |
| "learning_rate": 1.1613764302161222e-05, | |
| "loss": 0.8752883076667786, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.9553314121037464, | |
| "grad_norm": 0.5400457382202148, | |
| "learning_rate": 1.1590236101480339e-05, | |
| "loss": 0.670220673084259, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.9567723342939481, | |
| "grad_norm": 0.647760272026062, | |
| "learning_rate": 1.1566698865754291e-05, | |
| "loss": 0.8324419260025024, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.9582132564841499, | |
| "grad_norm": 0.6560764312744141, | |
| "learning_rate": 1.154315272871164e-05, | |
| "loss": 0.7774407863616943, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.9596541786743515, | |
| "grad_norm": 0.6726030111312866, | |
| "learning_rate": 1.1519597824131527e-05, | |
| "loss": 0.8403815031051636, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.9610951008645533, | |
| "grad_norm": 0.7448883056640625, | |
| "learning_rate": 1.1496034285842897e-05, | |
| "loss": 0.8905906677246094, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.962536023054755, | |
| "grad_norm": 0.6700533032417297, | |
| "learning_rate": 1.1472462247723752e-05, | |
| "loss": 0.7805229425430298, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.9639769452449568, | |
| "grad_norm": 0.6197190284729004, | |
| "learning_rate": 1.1448881843700392e-05, | |
| "loss": 0.7229803800582886, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.9654178674351584, | |
| "grad_norm": 0.59717857837677, | |
| "learning_rate": 1.1425293207746638e-05, | |
| "loss": 0.8611892461776733, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.9668587896253602, | |
| "grad_norm": 0.796004593372345, | |
| "learning_rate": 1.1401696473883086e-05, | |
| "loss": 0.8944194316864014, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.968299711815562, | |
| "grad_norm": 0.5947389602661133, | |
| "learning_rate": 1.1378091776176348e-05, | |
| "loss": 0.8134667873382568, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.9697406340057637, | |
| "grad_norm": 0.6667426824569702, | |
| "learning_rate": 1.1354479248738271e-05, | |
| "loss": 0.7905430197715759, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.9711815561959655, | |
| "grad_norm": 0.5860411524772644, | |
| "learning_rate": 1.1330859025725193e-05, | |
| "loss": 0.7300920486450195, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.9726224783861671, | |
| "grad_norm": 0.5794700384140015, | |
| "learning_rate": 1.130723124133718e-05, | |
| "loss": 0.7038317918777466, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.9740634005763689, | |
| "grad_norm": 0.6058333516120911, | |
| "learning_rate": 1.1283596029817248e-05, | |
| "loss": 0.8271040916442871, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.9755043227665706, | |
| "grad_norm": 0.6150603294372559, | |
| "learning_rate": 1.1259953525450616e-05, | |
| "loss": 0.7750831842422485, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.9769452449567724, | |
| "grad_norm": 0.6314917802810669, | |
| "learning_rate": 1.1236303862563945e-05, | |
| "loss": 0.9058209657669067, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.978386167146974, | |
| "grad_norm": 0.6402139067649841, | |
| "learning_rate": 1.1212647175524551e-05, | |
| "loss": 0.8667633533477783, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.9798270893371758, | |
| "grad_norm": 0.5912607312202454, | |
| "learning_rate": 1.1188983598739675e-05, | |
| "loss": 0.7763844728469849, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9812680115273775, | |
| "grad_norm": 0.6257530450820923, | |
| "learning_rate": 1.1165313266655698e-05, | |
| "loss": 0.8233456611633301, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.9827089337175793, | |
| "grad_norm": 0.6173887252807617, | |
| "learning_rate": 1.1141636313757369e-05, | |
| "loss": 0.7909761667251587, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.984149855907781, | |
| "grad_norm": 0.6075451374053955, | |
| "learning_rate": 1.1117952874567073e-05, | |
| "loss": 0.8156836628913879, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.9855907780979827, | |
| "grad_norm": 0.6228588819503784, | |
| "learning_rate": 1.1094263083644036e-05, | |
| "loss": 0.7954513430595398, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.9870317002881844, | |
| "grad_norm": 0.5579982399940491, | |
| "learning_rate": 1.1070567075583572e-05, | |
| "loss": 0.781398594379425, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.9884726224783862, | |
| "grad_norm": 0.5715999603271484, | |
| "learning_rate": 1.1046864985016326e-05, | |
| "loss": 0.7486555576324463, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.9899135446685879, | |
| "grad_norm": 0.7021177411079407, | |
| "learning_rate": 1.1023156946607485e-05, | |
| "loss": 0.866464376449585, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.9913544668587896, | |
| "grad_norm": 0.7282413840293884, | |
| "learning_rate": 1.0999443095056051e-05, | |
| "loss": 1.0165081024169922, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.9927953890489913, | |
| "grad_norm": 0.6795728802680969, | |
| "learning_rate": 1.0975723565094036e-05, | |
| "loss": 0.659792959690094, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.9942363112391931, | |
| "grad_norm": 0.5526747107505798, | |
| "learning_rate": 1.0951998491485722e-05, | |
| "loss": 0.6671736836433411, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.9956772334293948, | |
| "grad_norm": 0.6293612122535706, | |
| "learning_rate": 1.0928268009026885e-05, | |
| "loss": 0.7951771020889282, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.9971181556195965, | |
| "grad_norm": 0.5912222266197205, | |
| "learning_rate": 1.090453225254404e-05, | |
| "loss": 0.764556884765625, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.9985590778097982, | |
| "grad_norm": 0.6562872529029846, | |
| "learning_rate": 1.0880791356893652e-05, | |
| "loss": 0.7850635647773743, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6331807971000671, | |
| "learning_rate": 1.0857045456961394e-05, | |
| "loss": 0.8403358459472656, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.0014409221902016, | |
| "grad_norm": 0.53999924659729, | |
| "learning_rate": 1.0833294687661376e-05, | |
| "loss": 0.7220283150672913, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.0028818443804035, | |
| "grad_norm": 0.5253156423568726, | |
| "learning_rate": 1.0809539183935358e-05, | |
| "loss": 0.6956285238265991, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.0043227665706052, | |
| "grad_norm": 0.5885496139526367, | |
| "learning_rate": 1.0785779080752012e-05, | |
| "loss": 0.7220051288604736, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.005763688760807, | |
| "grad_norm": 0.5988635420799255, | |
| "learning_rate": 1.0762014513106143e-05, | |
| "loss": 0.6951655745506287, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.0072046109510087, | |
| "grad_norm": 0.49586206674575806, | |
| "learning_rate": 1.073824561601791e-05, | |
| "loss": 0.6196475625038147, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.0086455331412103, | |
| "grad_norm": 0.7789514064788818, | |
| "learning_rate": 1.0714472524532085e-05, | |
| "loss": 0.8181064128875732, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0100864553314122, | |
| "grad_norm": 0.5491945147514343, | |
| "learning_rate": 1.0690695373717254e-05, | |
| "loss": 0.6493509411811829, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.0115273775216138, | |
| "grad_norm": 0.7047191262245178, | |
| "learning_rate": 1.0666914298665079e-05, | |
| "loss": 0.7577710151672363, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.0129682997118155, | |
| "grad_norm": 0.7199708223342896, | |
| "learning_rate": 1.0643129434489514e-05, | |
| "loss": 0.7761712074279785, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.0144092219020173, | |
| "grad_norm": 0.5329734086990356, | |
| "learning_rate": 1.0619340916326039e-05, | |
| "loss": 0.6484905481338501, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.015850144092219, | |
| "grad_norm": 0.6241481304168701, | |
| "learning_rate": 1.05955488793309e-05, | |
| "loss": 0.669824481010437, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.0172910662824208, | |
| "grad_norm": 0.5850144028663635, | |
| "learning_rate": 1.0571753458680329e-05, | |
| "loss": 0.7012614011764526, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.0187319884726225, | |
| "grad_norm": 0.5404962301254272, | |
| "learning_rate": 1.0547954789569785e-05, | |
| "loss": 0.7472232580184937, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.0201729106628241, | |
| "grad_norm": 0.7023850679397583, | |
| "learning_rate": 1.0524153007213185e-05, | |
| "loss": 0.8266449570655823, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.021613832853026, | |
| "grad_norm": 0.6288999319076538, | |
| "learning_rate": 1.0500348246842136e-05, | |
| "loss": 0.8678215742111206, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.0230547550432276, | |
| "grad_norm": 0.7006089091300964, | |
| "learning_rate": 1.0476540643705153e-05, | |
| "loss": 0.7670779228210449, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0244956772334295, | |
| "grad_norm": 0.7132288217544556, | |
| "learning_rate": 1.0452730333066919e-05, | |
| "loss": 0.8972759246826172, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.0259365994236311, | |
| "grad_norm": 0.7113478779792786, | |
| "learning_rate": 1.0428917450207489e-05, | |
| "loss": 0.6248783469200134, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.0273775216138328, | |
| "grad_norm": 0.5936718583106995, | |
| "learning_rate": 1.0405102130421536e-05, | |
| "loss": 0.7173407077789307, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.0288184438040346, | |
| "grad_norm": 0.6577355265617371, | |
| "learning_rate": 1.0381284509017578e-05, | |
| "loss": 0.8124462366104126, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.0302593659942363, | |
| "grad_norm": 0.5350339412689209, | |
| "learning_rate": 1.035746472131721e-05, | |
| "loss": 0.6757724285125732, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.031700288184438, | |
| "grad_norm": 0.625091016292572, | |
| "learning_rate": 1.033364290265433e-05, | |
| "loss": 0.6315501928329468, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.0331412103746398, | |
| "grad_norm": 0.7091370820999146, | |
| "learning_rate": 1.0309819188374386e-05, | |
| "loss": 0.6656221151351929, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.0345821325648414, | |
| "grad_norm": 0.6561499238014221, | |
| "learning_rate": 1.0285993713833586e-05, | |
| "loss": 0.7100173234939575, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.0360230547550433, | |
| "grad_norm": 0.637753963470459, | |
| "learning_rate": 1.0262166614398144e-05, | |
| "loss": 0.6528655290603638, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.037463976945245, | |
| "grad_norm": 0.6694713234901428, | |
| "learning_rate": 1.0238338025443507e-05, | |
| "loss": 0.8758798837661743, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.0389048991354466, | |
| "grad_norm": 0.6426526308059692, | |
| "learning_rate": 1.0214508082353578e-05, | |
| "loss": 0.6384798288345337, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.0403458213256485, | |
| "grad_norm": 0.6031687259674072, | |
| "learning_rate": 1.019067692051996e-05, | |
| "loss": 0.7043944597244263, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.04178674351585, | |
| "grad_norm": 0.6691206693649292, | |
| "learning_rate": 1.016684467534118e-05, | |
| "loss": 0.7097047567367554, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.043227665706052, | |
| "grad_norm": 0.646927535533905, | |
| "learning_rate": 1.0143011482221916e-05, | |
| "loss": 0.6287018060684204, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.0446685878962536, | |
| "grad_norm": 0.6911733150482178, | |
| "learning_rate": 1.0119177476572237e-05, | |
| "loss": 0.645012617111206, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.0461095100864553, | |
| "grad_norm": 0.567036509513855, | |
| "learning_rate": 1.0095342793806828e-05, | |
| "loss": 0.6956562995910645, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.0475504322766571, | |
| "grad_norm": 0.679738461971283, | |
| "learning_rate": 1.0071507569344213e-05, | |
| "loss": 0.6614462733268738, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.0489913544668588, | |
| "grad_norm": 0.7040454149246216, | |
| "learning_rate": 1.0047671938606002e-05, | |
| "loss": 0.7205630540847778, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.0504322766570606, | |
| "grad_norm": 0.8227221369743347, | |
| "learning_rate": 1.0023836037016115e-05, | |
| "loss": 0.684057354927063, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.0518731988472623, | |
| "grad_norm": 0.6533084511756897, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5894599556922913, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.053314121037464, | |
| "grad_norm": 0.6469436287879944, | |
| "learning_rate": 9.976163962983889e-06, | |
| "loss": 0.6846705675125122, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.0547550432276658, | |
| "grad_norm": 0.6716520190238953, | |
| "learning_rate": 9.952328061394001e-06, | |
| "loss": 0.6836794018745422, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.0561959654178674, | |
| "grad_norm": 0.6842796802520752, | |
| "learning_rate": 9.92849243065579e-06, | |
| "loss": 0.7929626107215881, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.057636887608069, | |
| "grad_norm": 0.8187405467033386, | |
| "learning_rate": 9.904657206193175e-06, | |
| "loss": 0.6658978462219238, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.059077809798271, | |
| "grad_norm": 0.6604411602020264, | |
| "learning_rate": 9.880822523427766e-06, | |
| "loss": 0.6329740285873413, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.0605187319884726, | |
| "grad_norm": 0.6714515089988708, | |
| "learning_rate": 9.856988517778086e-06, | |
| "loss": 0.7321251630783081, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.0619596541786744, | |
| "grad_norm": 0.7017390727996826, | |
| "learning_rate": 9.833155324658823e-06, | |
| "loss": 0.744154691696167, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.063400576368876, | |
| "grad_norm": 0.7215039134025574, | |
| "learning_rate": 9.809323079480043e-06, | |
| "loss": 0.7160513401031494, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.0648414985590777, | |
| "grad_norm": 0.5923436284065247, | |
| "learning_rate": 9.785491917646425e-06, | |
| "loss": 0.6206352710723877, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.0662824207492796, | |
| "grad_norm": 0.6986830830574036, | |
| "learning_rate": 9.761661974556495e-06, | |
| "loss": 0.7324307560920715, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.0677233429394812, | |
| "grad_norm": 0.5980766415596008, | |
| "learning_rate": 9.737833385601858e-06, | |
| "loss": 0.6454845666885376, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.069164265129683, | |
| "grad_norm": 0.9852787852287292, | |
| "learning_rate": 9.714006286166416e-06, | |
| "loss": 0.6832539439201355, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.0706051873198847, | |
| "grad_norm": 0.9407469034194946, | |
| "learning_rate": 9.690180811625618e-06, | |
| "loss": 0.8757802248001099, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.0720461095100864, | |
| "grad_norm": 0.612558126449585, | |
| "learning_rate": 9.666357097345672e-06, | |
| "loss": 0.8261449337005615, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.0734870317002883, | |
| "grad_norm": 0.5846421122550964, | |
| "learning_rate": 9.642535278682795e-06, | |
| "loss": 0.6925964951515198, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.07492795389049, | |
| "grad_norm": 0.7762342691421509, | |
| "learning_rate": 9.618715490982425e-06, | |
| "loss": 0.7280269861221313, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.0763688760806915, | |
| "grad_norm": 0.689094066619873, | |
| "learning_rate": 9.594897869578466e-06, | |
| "loss": 0.8407827615737915, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.0778097982708934, | |
| "grad_norm": 0.6733883619308472, | |
| "learning_rate": 9.571082549792513e-06, | |
| "loss": 0.7604755163192749, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.079250720461095, | |
| "grad_norm": 0.608144223690033, | |
| "learning_rate": 9.547269666933085e-06, | |
| "loss": 0.646695613861084, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.080691642651297, | |
| "grad_norm": 0.7533197999000549, | |
| "learning_rate": 9.523459356294849e-06, | |
| "loss": 0.7508174180984497, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.0821325648414986, | |
| "grad_norm": 0.7083786129951477, | |
| "learning_rate": 9.499651753157869e-06, | |
| "loss": 0.7907015085220337, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.0835734870317002, | |
| "grad_norm": 0.7183002233505249, | |
| "learning_rate": 9.475846992786817e-06, | |
| "loss": 0.8525063991546631, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.085014409221902, | |
| "grad_norm": 0.6429514288902283, | |
| "learning_rate": 9.452045210430218e-06, | |
| "loss": 0.6777982711791992, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.0864553314121037, | |
| "grad_norm": 0.6770809292793274, | |
| "learning_rate": 9.428246541319673e-06, | |
| "loss": 0.7967904806137085, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.0878962536023056, | |
| "grad_norm": 0.6980239152908325, | |
| "learning_rate": 9.404451120669102e-06, | |
| "loss": 0.6735846400260925, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.0893371757925072, | |
| "grad_norm": 0.699763834476471, | |
| "learning_rate": 9.380659083673963e-06, | |
| "loss": 0.7672224044799805, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.0907780979827089, | |
| "grad_norm": 0.6815734505653381, | |
| "learning_rate": 9.356870565510488e-06, | |
| "loss": 0.665432333946228, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.0922190201729107, | |
| "grad_norm": 0.6672005653381348, | |
| "learning_rate": 9.333085701334925e-06, | |
| "loss": 0.6282204389572144, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.0936599423631124, | |
| "grad_norm": 0.8683066368103027, | |
| "learning_rate": 9.30930462628275e-06, | |
| "loss": 0.800000011920929, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.0951008645533142, | |
| "grad_norm": 0.6048387289047241, | |
| "learning_rate": 9.285527475467918e-06, | |
| "loss": 0.530065655708313, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.0965417867435159, | |
| "grad_norm": 0.7173153758049011, | |
| "learning_rate": 9.261754383982093e-06, | |
| "loss": 0.8034170866012573, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.0979827089337175, | |
| "grad_norm": 0.5964096784591675, | |
| "learning_rate": 9.23798548689386e-06, | |
| "loss": 0.6461498141288757, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.0994236311239194, | |
| "grad_norm": 0.6942315697669983, | |
| "learning_rate": 9.21422091924799e-06, | |
| "loss": 0.6815940737724304, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.100864553314121, | |
| "grad_norm": 0.7249640226364136, | |
| "learning_rate": 9.190460816064649e-06, | |
| "loss": 0.7779183387756348, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.1023054755043227, | |
| "grad_norm": 0.7075570821762085, | |
| "learning_rate": 9.16670531233863e-06, | |
| "loss": 0.8409022688865662, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.1037463976945245, | |
| "grad_norm": 0.6892013549804688, | |
| "learning_rate": 9.14295454303861e-06, | |
| "loss": 0.7865520715713501, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.1051873198847262, | |
| "grad_norm": 0.6577411890029907, | |
| "learning_rate": 9.119208643106353e-06, | |
| "loss": 0.8110712766647339, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.106628242074928, | |
| "grad_norm": 0.6635581254959106, | |
| "learning_rate": 9.095467747455965e-06, | |
| "loss": 0.63739013671875, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.1080691642651297, | |
| "grad_norm": 0.6412572264671326, | |
| "learning_rate": 9.071731990973118e-06, | |
| "loss": 0.6418280601501465, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.1095100864553313, | |
| "grad_norm": 0.7638756036758423, | |
| "learning_rate": 9.048001508514283e-06, | |
| "loss": 0.7094175815582275, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.1109510086455332, | |
| "grad_norm": 0.5744118094444275, | |
| "learning_rate": 9.024276434905969e-06, | |
| "loss": 0.6916787624359131, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.1123919308357348, | |
| "grad_norm": 0.793855607509613, | |
| "learning_rate": 9.000556904943956e-06, | |
| "loss": 0.7775775790214539, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.1138328530259365, | |
| "grad_norm": 0.6273725032806396, | |
| "learning_rate": 8.976843053392518e-06, | |
| "loss": 0.6217244267463684, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.1152737752161384, | |
| "grad_norm": 0.6361021399497986, | |
| "learning_rate": 8.95313501498368e-06, | |
| "loss": 0.659927487373352, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.11671469740634, | |
| "grad_norm": 0.7996638417243958, | |
| "learning_rate": 8.929432924416433e-06, | |
| "loss": 0.7214843034744263, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.1181556195965419, | |
| "grad_norm": 0.6677613854408264, | |
| "learning_rate": 8.905736916355969e-06, | |
| "loss": 0.8332221508026123, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.1195965417867435, | |
| "grad_norm": 0.6600406765937805, | |
| "learning_rate": 8.882047125432929e-06, | |
| "loss": 0.601822555065155, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.1210374639769451, | |
| "grad_norm": 0.6558908820152283, | |
| "learning_rate": 8.858363686242635e-06, | |
| "loss": 0.6736506223678589, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.122478386167147, | |
| "grad_norm": 0.6801712512969971, | |
| "learning_rate": 8.834686733344309e-06, | |
| "loss": 0.8313186168670654, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.1239193083573487, | |
| "grad_norm": 0.6852150559425354, | |
| "learning_rate": 8.811016401260327e-06, | |
| "loss": 0.6597498655319214, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.1253602305475505, | |
| "grad_norm": 0.7473975419998169, | |
| "learning_rate": 8.787352824475454e-06, | |
| "loss": 0.6347630023956299, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.1268011527377522, | |
| "grad_norm": 0.6975881457328796, | |
| "learning_rate": 8.76369613743606e-06, | |
| "loss": 0.7058587074279785, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.1282420749279538, | |
| "grad_norm": 0.7085602879524231, | |
| "learning_rate": 8.740046474549387e-06, | |
| "loss": 0.835166335105896, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.1296829971181557, | |
| "grad_norm": 0.6902181506156921, | |
| "learning_rate": 8.716403970182759e-06, | |
| "loss": 0.7125815153121948, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.1311239193083573, | |
| "grad_norm": 0.7300699353218079, | |
| "learning_rate": 8.692768758662827e-06, | |
| "loss": 0.6701489686965942, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.1325648414985592, | |
| "grad_norm": 0.747675895690918, | |
| "learning_rate": 8.66914097427481e-06, | |
| "loss": 0.6480385661125183, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.1340057636887608, | |
| "grad_norm": 0.7518520355224609, | |
| "learning_rate": 8.645520751261736e-06, | |
| "loss": 0.7200244665145874, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.1354466858789625, | |
| "grad_norm": 0.7091997861862183, | |
| "learning_rate": 8.621908223823657e-06, | |
| "loss": 0.7781722545623779, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.1368876080691643, | |
| "grad_norm": 0.6493226289749146, | |
| "learning_rate": 8.598303526116916e-06, | |
| "loss": 0.659550666809082, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.138328530259366, | |
| "grad_norm": 0.7172505855560303, | |
| "learning_rate": 8.574706792253367e-06, | |
| "loss": 0.7001605033874512, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.1397694524495678, | |
| "grad_norm": 0.7529043555259705, | |
| "learning_rate": 8.551118156299613e-06, | |
| "loss": 0.7431622743606567, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.1412103746397695, | |
| "grad_norm": 0.646467387676239, | |
| "learning_rate": 8.527537752276251e-06, | |
| "loss": 0.687673807144165, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.1426512968299711, | |
| "grad_norm": 0.7232325673103333, | |
| "learning_rate": 8.503965714157108e-06, | |
| "loss": 0.567053496837616, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.144092219020173, | |
| "grad_norm": 0.6364091634750366, | |
| "learning_rate": 8.480402175868477e-06, | |
| "loss": 0.6352185010910034, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.1455331412103746, | |
| "grad_norm": 0.6260091066360474, | |
| "learning_rate": 8.456847271288365e-06, | |
| "loss": 0.6209022998809814, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.1469740634005763, | |
| "grad_norm": 0.780681312084198, | |
| "learning_rate": 8.43330113424571e-06, | |
| "loss": 0.8006823658943176, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.1484149855907781, | |
| "grad_norm": 0.7389974594116211, | |
| "learning_rate": 8.409763898519665e-06, | |
| "loss": 0.7085101008415222, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.1498559077809798, | |
| "grad_norm": 0.6930822134017944, | |
| "learning_rate": 8.38623569783878e-06, | |
| "loss": 0.7008879780769348, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.1512968299711814, | |
| "grad_norm": 0.7401261329650879, | |
| "learning_rate": 8.362716665880289e-06, | |
| "loss": 0.8319974541664124, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.1527377521613833, | |
| "grad_norm": 0.8224151730537415, | |
| "learning_rate": 8.339206936269328e-06, | |
| "loss": 0.8165872097015381, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.154178674351585, | |
| "grad_norm": 0.7225102186203003, | |
| "learning_rate": 8.315706642578178e-06, | |
| "loss": 0.7181413173675537, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.1556195965417868, | |
| "grad_norm": 0.6846278309822083, | |
| "learning_rate": 8.292215918325518e-06, | |
| "loss": 0.5527253746986389, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.1570605187319885, | |
| "grad_norm": 0.7421664595603943, | |
| "learning_rate": 8.268734896975649e-06, | |
| "loss": 0.7071849703788757, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.15850144092219, | |
| "grad_norm": 0.7124081254005432, | |
| "learning_rate": 8.245263711937746e-06, | |
| "loss": 0.7202374339103699, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.159942363112392, | |
| "grad_norm": 0.6946137547492981, | |
| "learning_rate": 8.221802496565102e-06, | |
| "loss": 0.7475452423095703, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.1613832853025936, | |
| "grad_norm": 0.6117473244667053, | |
| "learning_rate": 8.198351384154363e-06, | |
| "loss": 0.6516153216362, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.1628242074927955, | |
| "grad_norm": 0.7568015456199646, | |
| "learning_rate": 8.174910507944775e-06, | |
| "loss": 0.7098596096038818, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.1642651296829971, | |
| "grad_norm": 0.7341271638870239, | |
| "learning_rate": 8.15148000111743e-06, | |
| "loss": 0.7767957448959351, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.1657060518731988, | |
| "grad_norm": 0.6909346580505371, | |
| "learning_rate": 8.128059996794495e-06, | |
| "loss": 0.7601003646850586, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.1671469740634006, | |
| "grad_norm": 0.7617642879486084, | |
| "learning_rate": 8.10465062803848e-06, | |
| "loss": 0.8070919513702393, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.1685878962536023, | |
| "grad_norm": 0.7109162211418152, | |
| "learning_rate": 8.08125202785146e-06, | |
| "loss": 0.7039991617202759, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.1700288184438041, | |
| "grad_norm": 0.611440122127533, | |
| "learning_rate": 8.05786432917433e-06, | |
| "loss": 0.7397060394287109, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.1714697406340058, | |
| "grad_norm": 0.6908255219459534, | |
| "learning_rate": 8.034487664886042e-06, | |
| "loss": 0.6378216743469238, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.1729106628242074, | |
| "grad_norm": 0.7022346258163452, | |
| "learning_rate": 8.011122167802869e-06, | |
| "loss": 0.7086902856826782, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.1743515850144093, | |
| "grad_norm": 0.8581727147102356, | |
| "learning_rate": 7.987767970677618e-06, | |
| "loss": 0.8129750490188599, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.175792507204611, | |
| "grad_norm": 0.7053741216659546, | |
| "learning_rate": 7.964425206198907e-06, | |
| "loss": 0.7385943531990051, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.1772334293948128, | |
| "grad_norm": 0.686326801776886, | |
| "learning_rate": 7.941094006990398e-06, | |
| "loss": 0.7873866558074951, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.1786743515850144, | |
| "grad_norm": 0.695410430431366, | |
| "learning_rate": 7.917774505610039e-06, | |
| "loss": 0.6239868402481079, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.180115273775216, | |
| "grad_norm": 0.7279312014579773, | |
| "learning_rate": 7.89446683454932e-06, | |
| "loss": 0.6867477893829346, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.181556195965418, | |
| "grad_norm": 0.7835471630096436, | |
| "learning_rate": 7.871171126232516e-06, | |
| "loss": 0.8003014326095581, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.1829971181556196, | |
| "grad_norm": 0.6107333898544312, | |
| "learning_rate": 7.84788751301593e-06, | |
| "loss": 0.6153905391693115, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.1844380403458212, | |
| "grad_norm": 0.6579324007034302, | |
| "learning_rate": 7.82461612718715e-06, | |
| "loss": 0.6941539645195007, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.185878962536023, | |
| "grad_norm": 0.7140527963638306, | |
| "learning_rate": 7.801357100964295e-06, | |
| "loss": 0.6446021795272827, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.1873198847262247, | |
| "grad_norm": 0.5526272654533386, | |
| "learning_rate": 7.778110566495256e-06, | |
| "loss": 0.6298232078552246, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.1887608069164266, | |
| "grad_norm": 0.7560884356498718, | |
| "learning_rate": 7.754876655856957e-06, | |
| "loss": 0.6755807399749756, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.1902017291066282, | |
| "grad_norm": 0.7089958190917969, | |
| "learning_rate": 7.731655501054597e-06, | |
| "loss": 0.8171476125717163, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.19164265129683, | |
| "grad_norm": 0.8474247455596924, | |
| "learning_rate": 7.708447234020898e-06, | |
| "loss": 0.6631404161453247, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.1930835734870318, | |
| "grad_norm": 0.696233868598938, | |
| "learning_rate": 7.685251986615363e-06, | |
| "loss": 0.679205060005188, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.1945244956772334, | |
| "grad_norm": 0.6988159418106079, | |
| "learning_rate": 7.662069890623525e-06, | |
| "loss": 0.6870052218437195, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.195965417867435, | |
| "grad_norm": 0.8398119807243347, | |
| "learning_rate": 7.63890107775619e-06, | |
| "loss": 0.6322426795959473, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.197406340057637, | |
| "grad_norm": 0.7013004422187805, | |
| "learning_rate": 7.615745679648702e-06, | |
| "loss": 0.7239193916320801, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.1988472622478386, | |
| "grad_norm": 0.7037452459335327, | |
| "learning_rate": 7.59260382786018e-06, | |
| "loss": 0.738641083240509, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.2002881844380404, | |
| "grad_norm": 0.7014064788818359, | |
| "learning_rate": 7.569475653872787e-06, | |
| "loss": 0.7159215807914734, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.201729106628242, | |
| "grad_norm": 0.7071189880371094, | |
| "learning_rate": 7.546361289090971e-06, | |
| "loss": 0.7976879477500916, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.2031700288184437, | |
| "grad_norm": 1.5894548892974854, | |
| "learning_rate": 7.5232608648407166e-06, | |
| "loss": 0.8454712629318237, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.2046109510086456, | |
| "grad_norm": 0.828970730304718, | |
| "learning_rate": 7.500174512368814e-06, | |
| "loss": 0.759265661239624, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.2060518731988472, | |
| "grad_norm": 0.798107385635376, | |
| "learning_rate": 7.477102362842099e-06, | |
| "loss": 0.6458663940429688, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.207492795389049, | |
| "grad_norm": 0.7149136066436768, | |
| "learning_rate": 7.454044547346708e-06, | |
| "loss": 0.7283859252929688, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.2089337175792507, | |
| "grad_norm": 0.6997484564781189, | |
| "learning_rate": 7.431001196887345e-06, | |
| "loss": 0.7165562510490417, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.2103746397694524, | |
| "grad_norm": 0.8367432355880737, | |
| "learning_rate": 7.407972442386527e-06, | |
| "loss": 0.8220187425613403, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.2118155619596542, | |
| "grad_norm": 0.7961417436599731, | |
| "learning_rate": 7.384958414683839e-06, | |
| "loss": 0.6605899930000305, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.2132564841498559, | |
| "grad_norm": 0.8134687542915344, | |
| "learning_rate": 7.361959244535199e-06, | |
| "loss": 0.7518759965896606, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.2146974063400577, | |
| "grad_norm": 0.8612692356109619, | |
| "learning_rate": 7.338975062612115e-06, | |
| "loss": 0.8546530604362488, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.2161383285302594, | |
| "grad_norm": 0.7533084154129028, | |
| "learning_rate": 7.316005999500924e-06, | |
| "loss": 0.6535155773162842, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.217579250720461, | |
| "grad_norm": 0.756385862827301, | |
| "learning_rate": 7.293052185702079e-06, | |
| "loss": 0.7160431742668152, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.219020172910663, | |
| "grad_norm": 0.7518402934074402, | |
| "learning_rate": 7.270113751629388e-06, | |
| "loss": 0.771348237991333, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.2204610951008645, | |
| "grad_norm": 0.7385017275810242, | |
| "learning_rate": 7.247190827609273e-06, | |
| "loss": 0.7204279899597168, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.2219020172910664, | |
| "grad_norm": 0.7346227765083313, | |
| "learning_rate": 7.224283543880041e-06, | |
| "loss": 0.678016722202301, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.223342939481268, | |
| "grad_norm": 0.7345762252807617, | |
| "learning_rate": 7.201392030591137e-06, | |
| "loss": 0.8019239902496338, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.2247838616714697, | |
| "grad_norm": 0.8108334541320801, | |
| "learning_rate": 7.178516417802399e-06, | |
| "loss": 0.7505182027816772, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.2262247838616716, | |
| "grad_norm": 0.7021329998970032, | |
| "learning_rate": 7.155656835483331e-06, | |
| "loss": 0.8881153464317322, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.2276657060518732, | |
| "grad_norm": 0.6703478097915649, | |
| "learning_rate": 7.132813413512361e-06, | |
| "loss": 0.6785498857498169, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.2291066282420748, | |
| "grad_norm": 0.6962404251098633, | |
| "learning_rate": 7.10998628167609e-06, | |
| "loss": 0.7082129120826721, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.2305475504322767, | |
| "grad_norm": 0.7256265878677368, | |
| "learning_rate": 7.087175569668576e-06, | |
| "loss": 0.5700943470001221, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.2319884726224783, | |
| "grad_norm": 0.6702523827552795, | |
| "learning_rate": 7.064381407090584e-06, | |
| "loss": 0.5819450616836548, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.23342939481268, | |
| "grad_norm": 0.7409508228302002, | |
| "learning_rate": 7.041603923448847e-06, | |
| "loss": 0.8606373071670532, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.2348703170028819, | |
| "grad_norm": 0.7683098316192627, | |
| "learning_rate": 7.018843248155345e-06, | |
| "loss": 0.7102565765380859, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.2363112391930835, | |
| "grad_norm": 0.8579858541488647, | |
| "learning_rate": 6.996099510526546e-06, | |
| "loss": 0.7700096368789673, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.2377521613832854, | |
| "grad_norm": 0.7399595975875854, | |
| "learning_rate": 6.973372839782699e-06, | |
| "loss": 0.7888767123222351, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.239193083573487, | |
| "grad_norm": 0.788192868232727, | |
| "learning_rate": 6.950663365047083e-06, | |
| "loss": 0.824313759803772, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.2406340057636887, | |
| "grad_norm": 0.7066922187805176, | |
| "learning_rate": 6.927971215345271e-06, | |
| "loss": 0.7467577457427979, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.2420749279538905, | |
| "grad_norm": 0.7298476099967957, | |
| "learning_rate": 6.905296519604407e-06, | |
| "loss": 0.7883299589157104, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.2435158501440922, | |
| "grad_norm": 0.7065430283546448, | |
| "learning_rate": 6.8826394066524695e-06, | |
| "loss": 0.7581709623336792, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.244956772334294, | |
| "grad_norm": 0.759701669216156, | |
| "learning_rate": 6.860000005217533e-06, | |
| "loss": 0.7661190629005432, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.2463976945244957, | |
| "grad_norm": 0.7207411527633667, | |
| "learning_rate": 6.837378443927052e-06, | |
| "loss": 0.6200900077819824, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.2478386167146973, | |
| "grad_norm": 0.7673540711402893, | |
| "learning_rate": 6.814774851307118e-06, | |
| "loss": 0.7384034395217896, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.2492795389048992, | |
| "grad_norm": 0.7653612494468689, | |
| "learning_rate": 6.7921893557817246e-06, | |
| "loss": 0.6381803750991821, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.2507204610951008, | |
| "grad_norm": 0.6993824243545532, | |
| "learning_rate": 6.769622085672054e-06, | |
| "loss": 0.7953975200653076, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.2521613832853027, | |
| "grad_norm": 0.8462424278259277, | |
| "learning_rate": 6.747073169195739e-06, | |
| "loss": 0.8438471555709839, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.2536023054755043, | |
| "grad_norm": 0.728303074836731, | |
| "learning_rate": 6.724542734466127e-06, | |
| "loss": 0.7537517547607422, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.255043227665706, | |
| "grad_norm": 0.8953651189804077, | |
| "learning_rate": 6.70203090949157e-06, | |
| "loss": 0.6626983880996704, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.2564841498559078, | |
| "grad_norm": 0.8448413014411926, | |
| "learning_rate": 6.679537822174682e-06, | |
| "loss": 0.7634541988372803, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.2579250720461095, | |
| "grad_norm": 0.6933168172836304, | |
| "learning_rate": 6.657063600311616e-06, | |
| "loss": 0.728123128414154, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.2593659942363113, | |
| "grad_norm": 0.728682279586792, | |
| "learning_rate": 6.634608371591343e-06, | |
| "loss": 0.7198150157928467, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.260806916426513, | |
| "grad_norm": 0.7677860260009766, | |
| "learning_rate": 6.6121722635949244e-06, | |
| "loss": 0.7593638896942139, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.2622478386167146, | |
| "grad_norm": 0.8202926516532898, | |
| "learning_rate": 6.58975540379478e-06, | |
| "loss": 0.7541650533676147, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.2636887608069165, | |
| "grad_norm": 0.7458558678627014, | |
| "learning_rate": 6.567357919553973e-06, | |
| "loss": 0.7063798904418945, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.2651296829971181, | |
| "grad_norm": 0.7055810689926147, | |
| "learning_rate": 6.544979938125485e-06, | |
| "loss": 0.6888713240623474, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.26657060518732, | |
| "grad_norm": 0.7375597953796387, | |
| "learning_rate": 6.522621586651485e-06, | |
| "loss": 0.7496410012245178, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.2680115273775217, | |
| "grad_norm": 0.8904906511306763, | |
| "learning_rate": 6.5002829921626206e-06, | |
| "loss": 0.7112149000167847, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.2694524495677233, | |
| "grad_norm": 0.7383248209953308, | |
| "learning_rate": 6.477964281577282e-06, | |
| "loss": 0.7184029817581177, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.270893371757925, | |
| "grad_norm": 0.7049366235733032, | |
| "learning_rate": 6.4556655817008895e-06, | |
| "loss": 0.7123745679855347, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.2723342939481268, | |
| "grad_norm": 0.8128007650375366, | |
| "learning_rate": 6.433387019225175e-06, | |
| "loss": 0.7427414059638977, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.2737752161383284, | |
| "grad_norm": 0.8199894428253174, | |
| "learning_rate": 6.411128720727448e-06, | |
| "loss": 0.8018887042999268, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.2752161383285303, | |
| "grad_norm": 0.7976292967796326, | |
| "learning_rate": 6.3888908126699015e-06, | |
| "loss": 0.7540068626403809, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.276657060518732, | |
| "grad_norm": 0.7567126750946045, | |
| "learning_rate": 6.366673421398869e-06, | |
| "loss": 0.6633398532867432, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.2780979827089336, | |
| "grad_norm": 0.9206326007843018, | |
| "learning_rate": 6.344476673144113e-06, | |
| "loss": 0.8141340613365173, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.2795389048991355, | |
| "grad_norm": 0.7636165618896484, | |
| "learning_rate": 6.322300694018122e-06, | |
| "loss": 0.922012448310852, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.280979827089337, | |
| "grad_norm": 0.5804350972175598, | |
| "learning_rate": 6.3001456100153754e-06, | |
| "loss": 0.5236533284187317, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.282420749279539, | |
| "grad_norm": 0.6871946454048157, | |
| "learning_rate": 6.278011547011638e-06, | |
| "loss": 0.6777955293655396, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.2838616714697406, | |
| "grad_norm": 0.9018382430076599, | |
| "learning_rate": 6.255898630763238e-06, | |
| "loss": 0.7229803204536438, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.2853025936599423, | |
| "grad_norm": 0.7636622190475464, | |
| "learning_rate": 6.233806986906367e-06, | |
| "loss": 0.7445772886276245, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.2867435158501441, | |
| "grad_norm": 0.8684889674186707, | |
| "learning_rate": 6.211736740956343e-06, | |
| "loss": 0.7453570365905762, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.2881844380403458, | |
| "grad_norm": 0.7295882105827332, | |
| "learning_rate": 6.189688018306919e-06, | |
| "loss": 0.7675601243972778, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.2896253602305476, | |
| "grad_norm": 0.7328001856803894, | |
| "learning_rate": 6.167660944229561e-06, | |
| "loss": 0.7515370845794678, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.2910662824207493, | |
| "grad_norm": 0.7595508694648743, | |
| "learning_rate": 6.145655643872733e-06, | |
| "loss": 0.567493736743927, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.292507204610951, | |
| "grad_norm": 0.614960253238678, | |
| "learning_rate": 6.123672242261191e-06, | |
| "loss": 0.645517110824585, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.2939481268011528, | |
| "grad_norm": 0.7227185368537903, | |
| "learning_rate": 6.101710864295279e-06, | |
| "loss": 0.7477235794067383, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.2953890489913544, | |
| "grad_norm": 0.8557547330856323, | |
| "learning_rate": 6.0797716347502e-06, | |
| "loss": 0.842688798904419, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.2968299711815563, | |
| "grad_norm": 0.7734098434448242, | |
| "learning_rate": 6.057854678275326e-06, | |
| "loss": 0.7552927732467651, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.298270893371758, | |
| "grad_norm": 0.9069559574127197, | |
| "learning_rate": 6.035960119393483e-06, | |
| "loss": 0.7265192270278931, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.2997118155619596, | |
| "grad_norm": 0.7913199067115784, | |
| "learning_rate": 6.014088082500241e-06, | |
| "loss": 0.8187565803527832, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.3011527377521614, | |
| "grad_norm": 0.7673888802528381, | |
| "learning_rate": 5.9922386918632145e-06, | |
| "loss": 0.6189776659011841, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.302593659942363, | |
| "grad_norm": 0.8198954463005066, | |
| "learning_rate": 5.9704120716213435e-06, | |
| "loss": 0.7065783739089966, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.304034582132565, | |
| "grad_norm": 0.7108833193778992, | |
| "learning_rate": 5.948608345784201e-06, | |
| "loss": 0.6191039085388184, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.3054755043227666, | |
| "grad_norm": 0.7285876870155334, | |
| "learning_rate": 5.926827638231289e-06, | |
| "loss": 0.645740270614624, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.3069164265129682, | |
| "grad_norm": 0.7736101150512695, | |
| "learning_rate": 5.905070072711318e-06, | |
| "loss": 0.6827014684677124, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.30835734870317, | |
| "grad_norm": 0.72000652551651, | |
| "learning_rate": 5.883335772841523e-06, | |
| "loss": 0.6328073740005493, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.3097982708933718, | |
| "grad_norm": 0.8541474342346191, | |
| "learning_rate": 5.8616248621069545e-06, | |
| "loss": 0.7510843276977539, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.3112391930835736, | |
| "grad_norm": 0.801873505115509, | |
| "learning_rate": 5.83993746385977e-06, | |
| "loss": 0.8494454026222229, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.3126801152737753, | |
| "grad_norm": 0.6421008110046387, | |
| "learning_rate": 5.818273701318542e-06, | |
| "loss": 0.6766311526298523, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.314121037463977, | |
| "grad_norm": 0.6372893452644348, | |
| "learning_rate": 5.796633697567557e-06, | |
| "loss": 0.6513572931289673, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.3155619596541785, | |
| "grad_norm": 0.7505905032157898, | |
| "learning_rate": 5.77501757555611e-06, | |
| "loss": 0.7472840547561646, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.3170028818443804, | |
| "grad_norm": 0.6798507571220398, | |
| "learning_rate": 5.753425458097817e-06, | |
| "loss": 0.6567862629890442, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.318443804034582, | |
| "grad_norm": 0.7536661624908447, | |
| "learning_rate": 5.731857467869902e-06, | |
| "loss": 0.7532881498336792, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.319884726224784, | |
| "grad_norm": 0.8154526352882385, | |
| "learning_rate": 5.710313727412513e-06, | |
| "loss": 0.7319618463516235, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.3213256484149856, | |
| "grad_norm": 0.7455941438674927, | |
| "learning_rate": 5.688794359128018e-06, | |
| "loss": 0.8852958679199219, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.3227665706051872, | |
| "grad_norm": 0.691473662853241, | |
| "learning_rate": 5.6672994852803184e-06, | |
| "loss": 0.7115726470947266, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.324207492795389, | |
| "grad_norm": 0.8230622410774231, | |
| "learning_rate": 5.645829227994146e-06, | |
| "loss": 0.7268555164337158, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.3256484149855907, | |
| "grad_norm": 0.7255274057388306, | |
| "learning_rate": 5.624383709254363e-06, | |
| "loss": 0.6968173980712891, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.3270893371757926, | |
| "grad_norm": 0.7870388627052307, | |
| "learning_rate": 5.602963050905296e-06, | |
| "loss": 0.7143142223358154, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.3285302593659942, | |
| "grad_norm": 0.753603994846344, | |
| "learning_rate": 5.58156737465001e-06, | |
| "loss": 0.662851095199585, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.3299711815561959, | |
| "grad_norm": 0.7982147336006165, | |
| "learning_rate": 5.560196802049633e-06, | |
| "loss": 0.7520275712013245, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.3314121037463977, | |
| "grad_norm": 0.7443618178367615, | |
| "learning_rate": 5.538851454522678e-06, | |
| "loss": 0.6887432932853699, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.3328530259365994, | |
| "grad_norm": 0.732810378074646, | |
| "learning_rate": 5.517531453344327e-06, | |
| "loss": 0.6998310685157776, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.3342939481268012, | |
| "grad_norm": 0.7935830950737, | |
| "learning_rate": 5.496236919645754e-06, | |
| "loss": 0.8417803645133972, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.3357348703170029, | |
| "grad_norm": 0.8099915981292725, | |
| "learning_rate": 5.474967974413451e-06, | |
| "loss": 0.6904634237289429, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.3371757925072045, | |
| "grad_norm": 0.803108274936676, | |
| "learning_rate": 5.453724738488511e-06, | |
| "loss": 0.7582980394363403, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.3386167146974064, | |
| "grad_norm": 0.7702426910400391, | |
| "learning_rate": 5.432507332565968e-06, | |
| "loss": 0.7501033544540405, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.340057636887608, | |
| "grad_norm": 0.5750554800033569, | |
| "learning_rate": 5.411315877194104e-06, | |
| "loss": 0.46313565969467163, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.34149855907781, | |
| "grad_norm": 0.7295573949813843, | |
| "learning_rate": 5.390150492773749e-06, | |
| "loss": 0.7082580327987671, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.3429394812680115, | |
| "grad_norm": 0.8001100420951843, | |
| "learning_rate": 5.369011299557617e-06, | |
| "loss": 0.5968407988548279, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.3443804034582132, | |
| "grad_norm": 0.7285897731781006, | |
| "learning_rate": 5.347898417649609e-06, | |
| "loss": 0.6727792024612427, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.345821325648415, | |
| "grad_norm": 0.7374528646469116, | |
| "learning_rate": 5.3268119670041465e-06, | |
| "loss": 0.7759701013565063, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.3472622478386167, | |
| "grad_norm": 0.8536549806594849, | |
| "learning_rate": 5.305752067425469e-06, | |
| "loss": 0.7828449010848999, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.3487031700288186, | |
| "grad_norm": 0.7041170001029968, | |
| "learning_rate": 5.284718838566968e-06, | |
| "loss": 0.8058781623840332, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.3501440922190202, | |
| "grad_norm": 0.7253990173339844, | |
| "learning_rate": 5.26371239993051e-06, | |
| "loss": 0.7985930442810059, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.3515850144092219, | |
| "grad_norm": 0.796826183795929, | |
| "learning_rate": 5.242732870865739e-06, | |
| "loss": 0.8930832147598267, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.3530259365994235, | |
| "grad_norm": 0.8277836441993713, | |
| "learning_rate": 5.221780370569415e-06, | |
| "loss": 0.8998521566390991, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.3544668587896254, | |
| "grad_norm": 0.6864756941795349, | |
| "learning_rate": 5.2008550180847394e-06, | |
| "loss": 0.65891432762146, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.3559077809798272, | |
| "grad_norm": 0.8034760355949402, | |
| "learning_rate": 5.1799569323006615e-06, | |
| "loss": 0.754687488079071, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.3573487031700289, | |
| "grad_norm": 0.7061858773231506, | |
| "learning_rate": 5.159086231951213e-06, | |
| "loss": 0.6347618103027344, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.3587896253602305, | |
| "grad_norm": 0.7044359445571899, | |
| "learning_rate": 5.138243035614842e-06, | |
| "loss": 0.7207168340682983, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.3602305475504322, | |
| "grad_norm": 0.795578122138977, | |
| "learning_rate": 5.117427461713724e-06, | |
| "loss": 0.778628945350647, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.361671469740634, | |
| "grad_norm": 0.9205237627029419, | |
| "learning_rate": 5.096639628513092e-06, | |
| "loss": 0.7258281111717224, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.3631123919308357, | |
| "grad_norm": 0.793719470500946, | |
| "learning_rate": 5.0758796541205794e-06, | |
| "loss": 0.9028610587120056, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.3645533141210375, | |
| "grad_norm": 0.803044855594635, | |
| "learning_rate": 5.055147656485526e-06, | |
| "loss": 0.6968406438827515, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.3659942363112392, | |
| "grad_norm": 0.8841282725334167, | |
| "learning_rate": 5.034443753398323e-06, | |
| "loss": 0.8499928712844849, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.3674351585014408, | |
| "grad_norm": 0.7515528202056885, | |
| "learning_rate": 5.01376806248975e-06, | |
| "loss": 0.6870205998420715, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 1.3688760806916427, | |
| "grad_norm": 0.9015482068061829, | |
| "learning_rate": 4.993120701230283e-06, | |
| "loss": 0.7434237003326416, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.3703170028818443, | |
| "grad_norm": 0.726290762424469, | |
| "learning_rate": 4.972501786929443e-06, | |
| "loss": 0.7235680818557739, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 1.3717579250720462, | |
| "grad_norm": 0.8368416428565979, | |
| "learning_rate": 4.951911436735142e-06, | |
| "loss": 0.7924642562866211, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.3731988472622478, | |
| "grad_norm": 0.7369707226753235, | |
| "learning_rate": 4.931349767632985e-06, | |
| "loss": 0.7321688532829285, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 1.3746397694524495, | |
| "grad_norm": 0.783839762210846, | |
| "learning_rate": 4.910816896445628e-06, | |
| "loss": 0.7634139060974121, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.3760806916426513, | |
| "grad_norm": 0.7845622301101685, | |
| "learning_rate": 4.890312939832119e-06, | |
| "loss": 0.7176026105880737, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.377521613832853, | |
| "grad_norm": 0.7255096435546875, | |
| "learning_rate": 4.869838014287217e-06, | |
| "loss": 0.7216761708259583, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.3789625360230549, | |
| "grad_norm": 0.7795526385307312, | |
| "learning_rate": 4.849392236140734e-06, | |
| "loss": 0.8003466725349426, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 1.3804034582132565, | |
| "grad_norm": 0.7873338460922241, | |
| "learning_rate": 4.828975721556895e-06, | |
| "loss": 0.7114070653915405, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.3818443804034581, | |
| "grad_norm": 0.8338869214057922, | |
| "learning_rate": 4.808588586533646e-06, | |
| "loss": 0.7371832132339478, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.38328530259366, | |
| "grad_norm": 0.7012267112731934, | |
| "learning_rate": 4.788230946902015e-06, | |
| "loss": 0.5402542948722839, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.3847262247838616, | |
| "grad_norm": 0.9027784466743469, | |
| "learning_rate": 4.76790291832546e-06, | |
| "loss": 0.7232666015625, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 1.3861671469740635, | |
| "grad_norm": 0.7093715667724609, | |
| "learning_rate": 4.747604616299189e-06, | |
| "loss": 0.6918929815292358, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.3876080691642652, | |
| "grad_norm": 0.716592013835907, | |
| "learning_rate": 4.727336156149516e-06, | |
| "loss": 0.71802818775177, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 1.3890489913544668, | |
| "grad_norm": 0.7714113593101501, | |
| "learning_rate": 4.707097653033219e-06, | |
| "loss": 0.722802996635437, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.3904899135446687, | |
| "grad_norm": 0.7559787034988403, | |
| "learning_rate": 4.686889221936861e-06, | |
| "loss": 0.7283670902252197, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.3919308357348703, | |
| "grad_norm": 0.7135756611824036, | |
| "learning_rate": 4.66671097767615e-06, | |
| "loss": 0.6234134435653687, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.3933717579250722, | |
| "grad_norm": 0.799081027507782, | |
| "learning_rate": 4.646563034895293e-06, | |
| "loss": 0.7715392112731934, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 1.3948126801152738, | |
| "grad_norm": 0.7190555930137634, | |
| "learning_rate": 4.626445508066329e-06, | |
| "loss": 0.6489726901054382, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.3962536023054755, | |
| "grad_norm": 0.97360759973526, | |
| "learning_rate": 4.606358511488486e-06, | |
| "loss": 0.7395188808441162, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 1.397694524495677, | |
| "grad_norm": 0.6608725786209106, | |
| "learning_rate": 4.58630215928754e-06, | |
| "loss": 0.6690818667411804, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.399135446685879, | |
| "grad_norm": 0.7637996077537537, | |
| "learning_rate": 4.566276565415152e-06, | |
| "loss": 0.8046863675117493, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 1.4005763688760806, | |
| "grad_norm": 0.8393040299415588, | |
| "learning_rate": 4.5462818436482245e-06, | |
| "loss": 0.825577437877655, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.4020172910662825, | |
| "grad_norm": 0.7651957869529724, | |
| "learning_rate": 4.52631810758827e-06, | |
| "loss": 0.7689692378044128, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 1.4034582132564841, | |
| "grad_norm": 0.8406446576118469, | |
| "learning_rate": 4.506385470660742e-06, | |
| "loss": 0.7642035484313965, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.4048991354466858, | |
| "grad_norm": 0.9124138951301575, | |
| "learning_rate": 4.486484046114403e-06, | |
| "loss": 0.6852501630783081, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.4063400576368876, | |
| "grad_norm": 0.9097302556037903, | |
| "learning_rate": 4.466613947020689e-06, | |
| "loss": 0.7974008917808533, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.4077809798270893, | |
| "grad_norm": 0.7557157874107361, | |
| "learning_rate": 4.4467752862730485e-06, | |
| "loss": 0.7818719148635864, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 1.4092219020172911, | |
| "grad_norm": 0.8131412863731384, | |
| "learning_rate": 4.42696817658631e-06, | |
| "loss": 0.6493479609489441, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.4106628242074928, | |
| "grad_norm": 0.7907167673110962, | |
| "learning_rate": 4.4071927304960534e-06, | |
| "loss": 0.79290771484375, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 1.4121037463976944, | |
| "grad_norm": 0.7542915344238281, | |
| "learning_rate": 4.38744906035795e-06, | |
| "loss": 0.8156715631484985, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.4135446685878963, | |
| "grad_norm": 0.7144057154655457, | |
| "learning_rate": 4.367737278347136e-06, | |
| "loss": 0.7458773851394653, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 1.414985590778098, | |
| "grad_norm": 0.8254581093788147, | |
| "learning_rate": 4.348057496457567e-06, | |
| "loss": 0.6097003221511841, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.4164265129682998, | |
| "grad_norm": 0.8161498308181763, | |
| "learning_rate": 4.328409826501403e-06, | |
| "loss": 0.7463165521621704, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.4178674351585014, | |
| "grad_norm": 0.8310127258300781, | |
| "learning_rate": 4.3087943801083445e-06, | |
| "loss": 0.6355860233306885, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.419308357348703, | |
| "grad_norm": 0.8795257210731506, | |
| "learning_rate": 4.289211268725009e-06, | |
| "loss": 0.7873852252960205, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.420749279538905, | |
| "grad_norm": 0.6934751272201538, | |
| "learning_rate": 4.269660603614316e-06, | |
| "loss": 0.6793715953826904, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.4221902017291066, | |
| "grad_norm": 0.8139089345932007, | |
| "learning_rate": 4.250142495854825e-06, | |
| "loss": 0.6482336521148682, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 1.4236311239193085, | |
| "grad_norm": 0.812993586063385, | |
| "learning_rate": 4.2306570563401185e-06, | |
| "loss": 0.6544175744056702, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.42507204610951, | |
| "grad_norm": 0.8022162318229675, | |
| "learning_rate": 4.211204395778183e-06, | |
| "loss": 0.7107487916946411, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 1.4265129682997117, | |
| "grad_norm": 0.8545569777488708, | |
| "learning_rate": 4.19178462469076e-06, | |
| "loss": 0.8046406507492065, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.4279538904899136, | |
| "grad_norm": 0.7910804748535156, | |
| "learning_rate": 4.172397853412725e-06, | |
| "loss": 0.7375363707542419, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 1.4293948126801153, | |
| "grad_norm": 0.8032233715057373, | |
| "learning_rate": 4.1530441920914746e-06, | |
| "loss": 0.7059754133224487, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.4308357348703171, | |
| "grad_norm": 0.6835878491401672, | |
| "learning_rate": 4.1337237506862744e-06, | |
| "loss": 0.6616318225860596, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 1.4322766570605188, | |
| "grad_norm": 0.6732160449028015, | |
| "learning_rate": 4.114436638967656e-06, | |
| "loss": 0.5688523054122925, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.4337175792507204, | |
| "grad_norm": 0.8257527351379395, | |
| "learning_rate": 4.095182966516787e-06, | |
| "loss": 0.8635351657867432, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.435158501440922, | |
| "grad_norm": 0.8186964392662048, | |
| "learning_rate": 4.075962842724847e-06, | |
| "loss": 0.6884078979492188, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.436599423631124, | |
| "grad_norm": 0.7682012915611267, | |
| "learning_rate": 4.0567763767923965e-06, | |
| "loss": 0.6973609328269958, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 1.4380403458213258, | |
| "grad_norm": 0.7340428233146667, | |
| "learning_rate": 4.037623677728783e-06, | |
| "loss": 0.645268440246582, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.4394812680115274, | |
| "grad_norm": 0.8506885170936584, | |
| "learning_rate": 4.018504854351495e-06, | |
| "loss": 0.639744758605957, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 1.440922190201729, | |
| "grad_norm": 0.7691463828086853, | |
| "learning_rate": 3.999420015285549e-06, | |
| "loss": 0.6750536561012268, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4423631123919307, | |
| "grad_norm": 0.7501078844070435, | |
| "learning_rate": 3.980369268962893e-06, | |
| "loss": 0.6951167583465576, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 1.4438040345821326, | |
| "grad_norm": 0.6547222137451172, | |
| "learning_rate": 3.961352723621757e-06, | |
| "loss": 0.5897108912467957, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.4452449567723342, | |
| "grad_norm": 0.7267579436302185, | |
| "learning_rate": 3.942370487306064e-06, | |
| "loss": 0.6418097019195557, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.446685878962536, | |
| "grad_norm": 0.7520357966423035, | |
| "learning_rate": 3.923422667864814e-06, | |
| "loss": 0.7392733693122864, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.4481268011527377, | |
| "grad_norm": 0.9271366000175476, | |
| "learning_rate": 3.904509372951453e-06, | |
| "loss": 0.7005877494812012, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.4495677233429394, | |
| "grad_norm": 0.741805911064148, | |
| "learning_rate": 3.885630710023275e-06, | |
| "loss": 0.7494614124298096, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.4510086455331412, | |
| "grad_norm": 0.7596203684806824, | |
| "learning_rate": 3.866786786340821e-06, | |
| "loss": 0.7183794975280762, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.4524495677233429, | |
| "grad_norm": 0.792771577835083, | |
| "learning_rate": 3.847977708967246e-06, | |
| "loss": 0.6995346546173096, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.4538904899135447, | |
| "grad_norm": 0.772834062576294, | |
| "learning_rate": 3.829203584767724e-06, | |
| "loss": 0.6018137335777283, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 1.4553314121037464, | |
| "grad_norm": 0.9559422135353088, | |
| "learning_rate": 3.810464520408853e-06, | |
| "loss": 0.7116073966026306, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.456772334293948, | |
| "grad_norm": 0.7630804777145386, | |
| "learning_rate": 3.7917606223580217e-06, | |
| "loss": 0.737439751625061, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 1.45821325648415, | |
| "grad_norm": 0.7160147428512573, | |
| "learning_rate": 3.7730919968828194e-06, | |
| "loss": 0.6739982962608337, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.4596541786743515, | |
| "grad_norm": 0.9138517379760742, | |
| "learning_rate": 3.754458750050445e-06, | |
| "loss": 0.8231876492500305, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 1.4610951008645534, | |
| "grad_norm": 0.850914716720581, | |
| "learning_rate": 3.7358609877270746e-06, | |
| "loss": 0.7859776020050049, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.462536023054755, | |
| "grad_norm": 0.6578977704048157, | |
| "learning_rate": 3.717298815577284e-06, | |
| "loss": 0.6639118194580078, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.4639769452449567, | |
| "grad_norm": 0.7375325560569763, | |
| "learning_rate": 3.6987723390634447e-06, | |
| "loss": 0.7305494546890259, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.4654178674351586, | |
| "grad_norm": 0.7297279834747314, | |
| "learning_rate": 3.6802816634451144e-06, | |
| "loss": 0.7086485028266907, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 1.4668587896253602, | |
| "grad_norm": 0.6900395154953003, | |
| "learning_rate": 3.661826893778443e-06, | |
| "loss": 0.5996535420417786, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.468299711815562, | |
| "grad_norm": 0.9336727261543274, | |
| "learning_rate": 3.6434081349155903e-06, | |
| "loss": 0.8409576416015625, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 1.4697406340057637, | |
| "grad_norm": 0.8965365290641785, | |
| "learning_rate": 3.6250254915041073e-06, | |
| "loss": 0.8301442861557007, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.4711815561959654, | |
| "grad_norm": 0.7489187717437744, | |
| "learning_rate": 3.6066790679863505e-06, | |
| "loss": 0.6619806289672852, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 1.4726224783861672, | |
| "grad_norm": 0.7200744152069092, | |
| "learning_rate": 3.588368968598903e-06, | |
| "loss": 0.7702663540840149, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.4740634005763689, | |
| "grad_norm": 0.7389686107635498, | |
| "learning_rate": 3.5700952973719573e-06, | |
| "loss": 0.6748791933059692, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 1.4755043227665707, | |
| "grad_norm": 0.9621058106422424, | |
| "learning_rate": 3.551858158128739e-06, | |
| "loss": 0.7804979085922241, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.4769452449567724, | |
| "grad_norm": 0.6999828815460205, | |
| "learning_rate": 3.533657654484922e-06, | |
| "loss": 0.7398617267608643, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.478386167146974, | |
| "grad_norm": 0.8494158387184143, | |
| "learning_rate": 3.515493889848025e-06, | |
| "loss": 0.647086501121521, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 1.4798270893371757, | |
| "grad_norm": 0.7813376784324646, | |
| "learning_rate": 3.49736696741683e-06, | |
| "loss": 0.753842830657959, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 1.4812680115273775, | |
| "grad_norm": 0.7741125226020813, | |
| "learning_rate": 3.4792769901808043e-06, | |
| "loss": 0.8448225259780884, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 1.4827089337175792, | |
| "grad_norm": 0.7925018072128296, | |
| "learning_rate": 3.4612240609195034e-06, | |
| "loss": 0.8170247077941895, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 1.484149855907781, | |
| "grad_norm": 0.7917532324790955, | |
| "learning_rate": 3.443208282201994e-06, | |
| "loss": 0.7810318470001221, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.4855907780979827, | |
| "grad_norm": 0.8103862404823303, | |
| "learning_rate": 3.4252297563862625e-06, | |
| "loss": 0.7185397148132324, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 1.4870317002881843, | |
| "grad_norm": 0.8068615198135376, | |
| "learning_rate": 3.407288585618654e-06, | |
| "loss": 0.6962016224861145, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.4884726224783862, | |
| "grad_norm": 0.712526261806488, | |
| "learning_rate": 3.3893848718332665e-06, | |
| "loss": 0.6078779101371765, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 1.4899135446685878, | |
| "grad_norm": 0.8054221868515015, | |
| "learning_rate": 3.371518716751383e-06, | |
| "loss": 0.6642535924911499, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 1.4913544668587897, | |
| "grad_norm": 0.735863983631134, | |
| "learning_rate": 3.3536902218809043e-06, | |
| "loss": 0.6583288311958313, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.4927953890489913, | |
| "grad_norm": 0.7167906165122986, | |
| "learning_rate": 3.3358994885157537e-06, | |
| "loss": 0.7499520778656006, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 1.494236311239193, | |
| "grad_norm": 0.9229914546012878, | |
| "learning_rate": 3.318146617735306e-06, | |
| "loss": 0.9205317497253418, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 1.4956772334293948, | |
| "grad_norm": 0.7089002728462219, | |
| "learning_rate": 3.3004317104038296e-06, | |
| "loss": 0.7000449299812317, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 1.4971181556195965, | |
| "grad_norm": 0.8045422434806824, | |
| "learning_rate": 3.2827548671698907e-06, | |
| "loss": 0.7404249906539917, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 1.4985590778097984, | |
| "grad_norm": 0.7974978685379028, | |
| "learning_rate": 3.26511618846579e-06, | |
| "loss": 0.6965054273605347, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.793845534324646, | |
| "learning_rate": 3.247515774507005e-06, | |
| "loss": 0.6663249731063843, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.5014409221902016, | |
| "grad_norm": 0.6912310123443604, | |
| "learning_rate": 3.2299537252915993e-06, | |
| "loss": 0.5732256770133972, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 1.5028818443804035, | |
| "grad_norm": 0.779844343662262, | |
| "learning_rate": 3.2124301405996616e-06, | |
| "loss": 0.6914101839065552, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 1.5043227665706052, | |
| "grad_norm": 0.6761540770530701, | |
| "learning_rate": 3.194945119992755e-06, | |
| "loss": 0.6391370296478271, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 1.505763688760807, | |
| "grad_norm": 0.7634333372116089, | |
| "learning_rate": 3.177498762813327e-06, | |
| "loss": 0.7757022976875305, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.5072046109510087, | |
| "grad_norm": 0.9414384961128235, | |
| "learning_rate": 3.160091168184154e-06, | |
| "loss": 0.6397742033004761, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 1.5086455331412103, | |
| "grad_norm": 0.8356814980506897, | |
| "learning_rate": 3.142722435007791e-06, | |
| "loss": 0.749836802482605, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 1.510086455331412, | |
| "grad_norm": 0.8035895228385925, | |
| "learning_rate": 3.1253926619659912e-06, | |
| "loss": 0.7546325922012329, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 1.5115273775216138, | |
| "grad_norm": 0.84291672706604, | |
| "learning_rate": 3.108101947519151e-06, | |
| "loss": 0.759354829788208, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 1.5129682997118157, | |
| "grad_norm": 0.731903076171875, | |
| "learning_rate": 3.0908503899057605e-06, | |
| "loss": 0.6469593048095703, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.5144092219020173, | |
| "grad_norm": 0.7537881731987, | |
| "learning_rate": 3.0736380871418305e-06, | |
| "loss": 0.7219445109367371, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 1.515850144092219, | |
| "grad_norm": 0.8318817019462585, | |
| "learning_rate": 3.0564651370203414e-06, | |
| "loss": 0.7360014915466309, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 1.5172910662824206, | |
| "grad_norm": 0.7886281609535217, | |
| "learning_rate": 3.039331637110697e-06, | |
| "loss": 0.7079243063926697, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 1.5187319884726225, | |
| "grad_norm": 0.8962216973304749, | |
| "learning_rate": 3.0222376847581546e-06, | |
| "loss": 0.7061739563941956, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 1.5201729106628243, | |
| "grad_norm": 0.8324871063232422, | |
| "learning_rate": 3.005183377083277e-06, | |
| "loss": 0.6976668834686279, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.521613832853026, | |
| "grad_norm": 0.7693350315093994, | |
| "learning_rate": 2.9881688109813933e-06, | |
| "loss": 0.745376467704773, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 1.5230547550432276, | |
| "grad_norm": 0.7759479880332947, | |
| "learning_rate": 2.971194083122029e-06, | |
| "loss": 0.8127241730690002, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 1.5244956772334293, | |
| "grad_norm": 0.6519967913627625, | |
| "learning_rate": 2.9542592899483633e-06, | |
| "loss": 0.6398651003837585, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 1.5259365994236311, | |
| "grad_norm": 0.7922623753547668, | |
| "learning_rate": 2.937364527676697e-06, | |
| "loss": 0.7102863788604736, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 1.527377521613833, | |
| "grad_norm": 0.8374100923538208, | |
| "learning_rate": 2.920509892295875e-06, | |
| "loss": 0.8550270795822144, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.5288184438040346, | |
| "grad_norm": 0.8438422679901123, | |
| "learning_rate": 2.903695479566774e-06, | |
| "loss": 0.6879276037216187, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 1.5302593659942363, | |
| "grad_norm": 0.857816755771637, | |
| "learning_rate": 2.886921385021729e-06, | |
| "loss": 0.7720720767974854, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 1.531700288184438, | |
| "grad_norm": 0.6902361512184143, | |
| "learning_rate": 2.870187703964017e-06, | |
| "loss": 0.5288726091384888, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 1.5331412103746398, | |
| "grad_norm": 0.8847417235374451, | |
| "learning_rate": 2.8534945314672946e-06, | |
| "loss": 0.648311197757721, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 1.5345821325648417, | |
| "grad_norm": 0.73543781042099, | |
| "learning_rate": 2.8368419623750633e-06, | |
| "loss": 0.7209224104881287, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.5360230547550433, | |
| "grad_norm": 0.7391148805618286, | |
| "learning_rate": 2.8202300913001445e-06, | |
| "loss": 0.6820803880691528, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 1.537463976945245, | |
| "grad_norm": 0.7311487793922424, | |
| "learning_rate": 2.8036590126241226e-06, | |
| "loss": 0.7790380120277405, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 1.5389048991354466, | |
| "grad_norm": 0.8042330145835876, | |
| "learning_rate": 2.7871288204968127e-06, | |
| "loss": 0.7293061017990112, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 1.5403458213256485, | |
| "grad_norm": 0.8683137893676758, | |
| "learning_rate": 2.7706396088357444e-06, | |
| "loss": 0.6657461524009705, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 1.54178674351585, | |
| "grad_norm": 0.7877099514007568, | |
| "learning_rate": 2.754191471325601e-06, | |
| "loss": 0.7446212768554688, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.543227665706052, | |
| "grad_norm": 0.7506648302078247, | |
| "learning_rate": 2.737784501417702e-06, | |
| "loss": 0.5783571004867554, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 1.5446685878962536, | |
| "grad_norm": 0.9356808662414551, | |
| "learning_rate": 2.7214187923294766e-06, | |
| "loss": 0.710444986820221, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 1.5461095100864553, | |
| "grad_norm": 0.8530304431915283, | |
| "learning_rate": 2.70509443704392e-06, | |
| "loss": 0.7275018095970154, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 1.547550432276657, | |
| "grad_norm": 0.8602854609489441, | |
| "learning_rate": 2.6888115283090754e-06, | |
| "loss": 0.6969873905181885, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 1.5489913544668588, | |
| "grad_norm": 0.7868731021881104, | |
| "learning_rate": 2.6725701586375075e-06, | |
| "loss": 0.6938682794570923, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.5504322766570606, | |
| "grad_norm": 0.7867501378059387, | |
| "learning_rate": 2.6563704203057704e-06, | |
| "loss": 0.6999156475067139, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 1.5518731988472623, | |
| "grad_norm": 0.8717447519302368, | |
| "learning_rate": 2.6402124053538837e-06, | |
| "loss": 0.7442126274108887, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 1.553314121037464, | |
| "grad_norm": 0.8786858916282654, | |
| "learning_rate": 2.6240962055848196e-06, | |
| "loss": 0.8091111183166504, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 1.5547550432276656, | |
| "grad_norm": 0.8132948875427246, | |
| "learning_rate": 2.6080219125639703e-06, | |
| "loss": 0.6046196222305298, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 1.5561959654178674, | |
| "grad_norm": 0.8643730878829956, | |
| "learning_rate": 2.5919896176186287e-06, | |
| "loss": 0.8511631488800049, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.5576368876080693, | |
| "grad_norm": 0.9204576015472412, | |
| "learning_rate": 2.575999411837481e-06, | |
| "loss": 0.8237127661705017, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 1.559077809798271, | |
| "grad_norm": 0.7882550358772278, | |
| "learning_rate": 2.560051386070073e-06, | |
| "loss": 0.7889937162399292, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 1.5605187319884726, | |
| "grad_norm": 0.8252015113830566, | |
| "learning_rate": 2.5441456309263e-06, | |
| "loss": 0.6393820643424988, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 1.5619596541786742, | |
| "grad_norm": 0.8019078969955444, | |
| "learning_rate": 2.5282822367759054e-06, | |
| "loss": 0.6617242693901062, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 1.563400576368876, | |
| "grad_norm": 0.8571462631225586, | |
| "learning_rate": 2.512461293747942e-06, | |
| "loss": 0.8511845469474792, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.564841498559078, | |
| "grad_norm": 0.7433684468269348, | |
| "learning_rate": 2.496682891730279e-06, | |
| "loss": 0.7948633432388306, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 1.5662824207492796, | |
| "grad_norm": 0.8066547513008118, | |
| "learning_rate": 2.480947120369089e-06, | |
| "loss": 0.648466944694519, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 1.5677233429394812, | |
| "grad_norm": 0.816417396068573, | |
| "learning_rate": 2.4652540690683315e-06, | |
| "loss": 0.8480396866798401, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 1.5691642651296829, | |
| "grad_norm": 0.7453494668006897, | |
| "learning_rate": 2.4496038269892455e-06, | |
| "loss": 0.7550395131111145, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 1.5706051873198847, | |
| "grad_norm": 0.8361696004867554, | |
| "learning_rate": 2.433996483049855e-06, | |
| "loss": 0.6834908723831177, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.5720461095100866, | |
| "grad_norm": 0.7857866883277893, | |
| "learning_rate": 2.418432125924449e-06, | |
| "loss": 0.7276380062103271, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 1.5734870317002883, | |
| "grad_norm": 0.7743988037109375, | |
| "learning_rate": 2.4029108440430838e-06, | |
| "loss": 0.7755744457244873, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 1.57492795389049, | |
| "grad_norm": 0.8050707578659058, | |
| "learning_rate": 2.387432725591078e-06, | |
| "loss": 0.8086447715759277, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 1.5763688760806915, | |
| "grad_norm": 0.7724820971488953, | |
| "learning_rate": 2.3719978585085234e-06, | |
| "loss": 0.7475936412811279, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 1.5778097982708934, | |
| "grad_norm": 0.8708832263946533, | |
| "learning_rate": 2.356606330489769e-06, | |
| "loss": 0.6741630434989929, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.579250720461095, | |
| "grad_norm": 0.8784914016723633, | |
| "learning_rate": 2.3412582289829254e-06, | |
| "loss": 0.5807492733001709, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 1.580691642651297, | |
| "grad_norm": 0.6939861178398132, | |
| "learning_rate": 2.3259536411893836e-06, | |
| "loss": 0.6853386163711548, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 1.5821325648414986, | |
| "grad_norm": 0.7035260200500488, | |
| "learning_rate": 2.3106926540633e-06, | |
| "loss": 0.6275226473808289, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 1.5835734870317002, | |
| "grad_norm": 0.8285399079322815, | |
| "learning_rate": 2.2954753543111097e-06, | |
| "loss": 0.7287248373031616, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 1.585014409221902, | |
| "grad_norm": 0.828209638595581, | |
| "learning_rate": 2.2803018283910415e-06, | |
| "loss": 0.775260865688324, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.5864553314121037, | |
| "grad_norm": 0.717948853969574, | |
| "learning_rate": 2.2651721625126167e-06, | |
| "loss": 0.5827840566635132, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 1.5878962536023056, | |
| "grad_norm": 0.8269100785255432, | |
| "learning_rate": 2.2500864426361556e-06, | |
| "loss": 0.820456862449646, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 1.5893371757925072, | |
| "grad_norm": 0.7805430293083191, | |
| "learning_rate": 2.23504475447231e-06, | |
| "loss": 0.6738294363021851, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 1.5907780979827089, | |
| "grad_norm": 0.7611426115036011, | |
| "learning_rate": 2.2200471834815497e-06, | |
| "loss": 0.6646812558174133, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 1.5922190201729105, | |
| "grad_norm": 0.7774697542190552, | |
| "learning_rate": 2.2050938148736934e-06, | |
| "loss": 0.7116397619247437, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.5936599423631124, | |
| "grad_norm": 0.8153246641159058, | |
| "learning_rate": 2.1901847336074258e-06, | |
| "loss": 0.7710515260696411, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 1.5951008645533142, | |
| "grad_norm": 0.7410541772842407, | |
| "learning_rate": 2.1753200243898032e-06, | |
| "loss": 0.7078261375427246, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 1.5965417867435159, | |
| "grad_norm": 0.7237730622291565, | |
| "learning_rate": 2.160499771675778e-06, | |
| "loss": 0.706581711769104, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 1.5979827089337175, | |
| "grad_norm": 0.8456921577453613, | |
| "learning_rate": 2.14572405966773e-06, | |
| "loss": 0.7275344133377075, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 1.5994236311239192, | |
| "grad_norm": 0.7417944669723511, | |
| "learning_rate": 2.130992972314965e-06, | |
| "loss": 0.7175461053848267, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.600864553314121, | |
| "grad_norm": 0.7151730060577393, | |
| "learning_rate": 2.1163065933132544e-06, | |
| "loss": 0.6884589791297913, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 1.602305475504323, | |
| "grad_norm": 0.8144704699516296, | |
| "learning_rate": 2.101665006104362e-06, | |
| "loss": 0.750603199005127, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 1.6037463976945245, | |
| "grad_norm": 1.0301843881607056, | |
| "learning_rate": 2.087068293875557e-06, | |
| "loss": 0.6825680732727051, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 1.6051873198847262, | |
| "grad_norm": 0.7837945222854614, | |
| "learning_rate": 2.0725165395591472e-06, | |
| "loss": 0.6988552212715149, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 1.6066282420749278, | |
| "grad_norm": 0.7630927562713623, | |
| "learning_rate": 2.0580098258320167e-06, | |
| "loss": 0.7969825267791748, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.6080691642651297, | |
| "grad_norm": 0.90474534034729, | |
| "learning_rate": 2.043548235115139e-06, | |
| "loss": 0.7409637570381165, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 1.6095100864553316, | |
| "grad_norm": 0.879987895488739, | |
| "learning_rate": 2.0291318495731215e-06, | |
| "loss": 0.7136498689651489, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 1.6109510086455332, | |
| "grad_norm": 0.7135412096977234, | |
| "learning_rate": 2.014760751113738e-06, | |
| "loss": 0.6333821415901184, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 1.6123919308357348, | |
| "grad_norm": 0.8625454902648926, | |
| "learning_rate": 2.000435021387457e-06, | |
| "loss": 0.6699397563934326, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 1.6138328530259365, | |
| "grad_norm": 0.9795721173286438, | |
| "learning_rate": 1.9861547417869776e-06, | |
| "loss": 0.7384968996047974, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.6152737752161384, | |
| "grad_norm": 0.7938385605812073, | |
| "learning_rate": 1.9719199934467804e-06, | |
| "loss": 0.7596741914749146, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 1.6167146974063402, | |
| "grad_norm": 0.8295004367828369, | |
| "learning_rate": 1.957730857242649e-06, | |
| "loss": 0.7557300329208374, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 1.6181556195965419, | |
| "grad_norm": 0.8414580821990967, | |
| "learning_rate": 1.943587413791217e-06, | |
| "loss": 0.6814196109771729, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 1.6195965417867435, | |
| "grad_norm": 0.7273333072662354, | |
| "learning_rate": 1.9294897434495196e-06, | |
| "loss": 0.5870025157928467, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 1.6210374639769451, | |
| "grad_norm": 0.7663788199424744, | |
| "learning_rate": 1.915437926314523e-06, | |
| "loss": 0.6649688482284546, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.622478386167147, | |
| "grad_norm": 0.8005634546279907, | |
| "learning_rate": 1.9014320422226707e-06, | |
| "loss": 0.6026031374931335, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 1.6239193083573487, | |
| "grad_norm": 0.7385629415512085, | |
| "learning_rate": 1.8874721707494448e-06, | |
| "loss": 0.6108108162879944, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 1.6253602305475505, | |
| "grad_norm": 0.7913649082183838, | |
| "learning_rate": 1.8735583912088951e-06, | |
| "loss": 0.7508318424224854, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 1.6268011527377522, | |
| "grad_norm": 0.7595199346542358, | |
| "learning_rate": 1.8596907826531962e-06, | |
| "loss": 0.7356204986572266, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 1.6282420749279538, | |
| "grad_norm": 0.8176481127738953, | |
| "learning_rate": 1.8458694238722086e-06, | |
| "loss": 0.6614360213279724, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.6296829971181557, | |
| "grad_norm": 0.7632184028625488, | |
| "learning_rate": 1.8320943933930103e-06, | |
| "loss": 0.70728600025177, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 1.6311239193083573, | |
| "grad_norm": 0.733985960483551, | |
| "learning_rate": 1.818365769479462e-06, | |
| "loss": 0.720178484916687, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 1.6325648414985592, | |
| "grad_norm": 0.7500304579734802, | |
| "learning_rate": 1.8046836301317727e-06, | |
| "loss": 0.688285231590271, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 1.6340057636887608, | |
| "grad_norm": 0.7885209918022156, | |
| "learning_rate": 1.7910480530860363e-06, | |
| "loss": 0.7022037506103516, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 1.6354466858789625, | |
| "grad_norm": 0.7801220417022705, | |
| "learning_rate": 1.7774591158137977e-06, | |
| "loss": 0.6189717054367065, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.6368876080691641, | |
| "grad_norm": 0.7880612015724182, | |
| "learning_rate": 1.7639168955216257e-06, | |
| "loss": 0.6936331987380981, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 1.638328530259366, | |
| "grad_norm": 0.7047246694564819, | |
| "learning_rate": 1.7504214691506527e-06, | |
| "loss": 0.8859960436820984, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 1.6397694524495678, | |
| "grad_norm": 0.7471388578414917, | |
| "learning_rate": 1.7369729133761493e-06, | |
| "loss": 0.6911474466323853, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 1.6412103746397695, | |
| "grad_norm": 0.8122578263282776, | |
| "learning_rate": 1.7235713046070935e-06, | |
| "loss": 0.8664177656173706, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 1.6426512968299711, | |
| "grad_norm": 0.8845809102058411, | |
| "learning_rate": 1.7102167189857255e-06, | |
| "loss": 0.7293643951416016, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.6440922190201728, | |
| "grad_norm": 0.7609624266624451, | |
| "learning_rate": 1.6969092323871195e-06, | |
| "loss": 0.5892981886863708, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 1.6455331412103746, | |
| "grad_norm": 0.7141941785812378, | |
| "learning_rate": 1.6836489204187511e-06, | |
| "loss": 0.6005899906158447, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 1.6469740634005765, | |
| "grad_norm": 0.8074778318405151, | |
| "learning_rate": 1.6704358584200809e-06, | |
| "loss": 0.7091890573501587, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 1.6484149855907781, | |
| "grad_norm": 0.7607950568199158, | |
| "learning_rate": 1.6572701214621013e-06, | |
| "loss": 0.6214733719825745, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 1.6498559077809798, | |
| "grad_norm": 0.8153498768806458, | |
| "learning_rate": 1.6441517843469302e-06, | |
| "loss": 0.6415261030197144, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.6512968299711814, | |
| "grad_norm": 0.7986946105957031, | |
| "learning_rate": 1.631080921607383e-06, | |
| "loss": 0.6604084968566895, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 1.6527377521613833, | |
| "grad_norm": 0.8780381083488464, | |
| "learning_rate": 1.6180576075065412e-06, | |
| "loss": 0.6568028330802917, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 1.6541786743515852, | |
| "grad_norm": 0.7540323138237, | |
| "learning_rate": 1.6050819160373331e-06, | |
| "loss": 0.7073581218719482, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 1.6556195965417868, | |
| "grad_norm": 0.7875475883483887, | |
| "learning_rate": 1.5921539209221238e-06, | |
| "loss": 0.7660901546478271, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 1.6570605187319885, | |
| "grad_norm": 0.7967971563339233, | |
| "learning_rate": 1.5792736956122801e-06, | |
| "loss": 0.6941828727722168, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.65850144092219, | |
| "grad_norm": 0.804412305355072, | |
| "learning_rate": 1.566441313287762e-06, | |
| "loss": 0.7443846464157104, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 1.659942363112392, | |
| "grad_norm": 0.7768911719322205, | |
| "learning_rate": 1.5536568468567126e-06, | |
| "loss": 0.783721923828125, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.6613832853025938, | |
| "grad_norm": 0.8279328942298889, | |
| "learning_rate": 1.5409203689550313e-06, | |
| "loss": 0.7449991703033447, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 1.6628242074927955, | |
| "grad_norm": 0.7922146320343018, | |
| "learning_rate": 1.5282319519459643e-06, | |
| "loss": 0.7389559745788574, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 1.6642651296829971, | |
| "grad_norm": 0.8328949213027954, | |
| "learning_rate": 1.5155916679197057e-06, | |
| "loss": 0.7289194464683533, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.6657060518731988, | |
| "grad_norm": 0.782349705696106, | |
| "learning_rate": 1.5029995886929717e-06, | |
| "loss": 0.8883833885192871, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 1.6671469740634006, | |
| "grad_norm": 0.8573653101921082, | |
| "learning_rate": 1.4904557858085967e-06, | |
| "loss": 0.680975079536438, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 1.6685878962536023, | |
| "grad_norm": 1.300133466720581, | |
| "learning_rate": 1.4779603305351397e-06, | |
| "loss": 0.6703798770904541, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 1.6700288184438041, | |
| "grad_norm": 0.8434372544288635, | |
| "learning_rate": 1.4655132938664607e-06, | |
| "loss": 0.6783407926559448, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 1.6714697406340058, | |
| "grad_norm": 0.7665268182754517, | |
| "learning_rate": 1.4531147465213247e-06, | |
| "loss": 0.7479151487350464, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.6729106628242074, | |
| "grad_norm": 0.7832397818565369, | |
| "learning_rate": 1.4407647589430084e-06, | |
| "loss": 0.8307238817214966, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 1.674351585014409, | |
| "grad_norm": 0.803061306476593, | |
| "learning_rate": 1.4284634012988886e-06, | |
| "loss": 0.719711422920227, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 1.675792507204611, | |
| "grad_norm": 0.786881685256958, | |
| "learning_rate": 1.4162107434800422e-06, | |
| "loss": 0.7440253496170044, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 1.6772334293948128, | |
| "grad_norm": 0.8766026496887207, | |
| "learning_rate": 1.4040068551008658e-06, | |
| "loss": 0.7081141471862793, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 1.6786743515850144, | |
| "grad_norm": 0.7960566878318787, | |
| "learning_rate": 1.3918518054986607e-06, | |
| "loss": 0.5892655253410339, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.680115273775216, | |
| "grad_norm": 0.7898479104042053, | |
| "learning_rate": 1.3797456637332451e-06, | |
| "loss": 0.7909804582595825, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 1.6815561959654177, | |
| "grad_norm": 0.805218517780304, | |
| "learning_rate": 1.3676884985865735e-06, | |
| "loss": 0.773381233215332, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 1.6829971181556196, | |
| "grad_norm": 0.8499282598495483, | |
| "learning_rate": 1.3556803785623274e-06, | |
| "loss": 0.7352174520492554, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 1.6844380403458215, | |
| "grad_norm": 0.7760320901870728, | |
| "learning_rate": 1.3437213718855347e-06, | |
| "loss": 0.7122522592544556, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 1.685878962536023, | |
| "grad_norm": 0.9259606003761292, | |
| "learning_rate": 1.3318115465021896e-06, | |
| "loss": 0.7463353872299194, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.6873198847262247, | |
| "grad_norm": 0.7640926837921143, | |
| "learning_rate": 1.3199509700788527e-06, | |
| "loss": 0.751839280128479, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 1.6887608069164264, | |
| "grad_norm": 0.6983827948570251, | |
| "learning_rate": 1.3081397100022718e-06, | |
| "loss": 0.6213783025741577, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 1.6902017291066282, | |
| "grad_norm": 0.7470341920852661, | |
| "learning_rate": 1.2963778333790067e-06, | |
| "loss": 0.7670629024505615, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 1.6916426512968301, | |
| "grad_norm": 0.8287291526794434, | |
| "learning_rate": 1.2846654070350372e-06, | |
| "loss": 0.6274840831756592, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 1.6930835734870318, | |
| "grad_norm": 0.8524205684661865, | |
| "learning_rate": 1.2730024975153854e-06, | |
| "loss": 0.7239786386489868, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.6945244956772334, | |
| "grad_norm": 0.844444215297699, | |
| "learning_rate": 1.2613891710837467e-06, | |
| "loss": 0.7526017427444458, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.695965417867435, | |
| "grad_norm": 0.9229111671447754, | |
| "learning_rate": 1.249825493722101e-06, | |
| "loss": 0.707030177116394, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 1.697406340057637, | |
| "grad_norm": 0.7492605447769165, | |
| "learning_rate": 1.2383115311303417e-06, | |
| "loss": 0.7857674360275269, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 1.6988472622478388, | |
| "grad_norm": 0.8065267205238342, | |
| "learning_rate": 1.2268473487259124e-06, | |
| "loss": 0.7206966876983643, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 1.7002881844380404, | |
| "grad_norm": 0.6866024136543274, | |
| "learning_rate": 1.2154330116434188e-06, | |
| "loss": 0.6249977350234985, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.701729106628242, | |
| "grad_norm": 0.7971569299697876, | |
| "learning_rate": 1.204068584734267e-06, | |
| "loss": 0.6811063289642334, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 1.7031700288184437, | |
| "grad_norm": 0.8167104721069336, | |
| "learning_rate": 1.1927541325663018e-06, | |
| "loss": 0.6600002646446228, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 1.7046109510086456, | |
| "grad_norm": 0.7842592000961304, | |
| "learning_rate": 1.1814897194234253e-06, | |
| "loss": 0.6734592914581299, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 1.7060518731988472, | |
| "grad_norm": 0.8304277658462524, | |
| "learning_rate": 1.1702754093052415e-06, | |
| "loss": 0.7508211135864258, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 1.707492795389049, | |
| "grad_norm": 0.813774049282074, | |
| "learning_rate": 1.1591112659266934e-06, | |
| "loss": 0.7605842351913452, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.7089337175792507, | |
| "grad_norm": 0.7906317114830017, | |
| "learning_rate": 1.1479973527176935e-06, | |
| "loss": 0.7054247260093689, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 1.7103746397694524, | |
| "grad_norm": 0.7082518935203552, | |
| "learning_rate": 1.136933732822768e-06, | |
| "loss": 0.6379563212394714, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 1.7118155619596542, | |
| "grad_norm": 0.7211143970489502, | |
| "learning_rate": 1.125920469100704e-06, | |
| "loss": 0.5920464992523193, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 1.7132564841498559, | |
| "grad_norm": 0.7928754091262817, | |
| "learning_rate": 1.1149576241241788e-06, | |
| "loss": 0.6615055799484253, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 1.7146974063400577, | |
| "grad_norm": 0.6627988219261169, | |
| "learning_rate": 1.104045260179415e-06, | |
| "loss": 0.6191346645355225, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.7161383285302594, | |
| "grad_norm": 0.8548330664634705, | |
| "learning_rate": 1.0931834392658213e-06, | |
| "loss": 0.6938613653182983, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 1.717579250720461, | |
| "grad_norm": 0.806582510471344, | |
| "learning_rate": 1.082372223095647e-06, | |
| "loss": 0.7682313323020935, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 1.7190201729106627, | |
| "grad_norm": 0.7843705415725708, | |
| "learning_rate": 1.0716116730936254e-06, | |
| "loss": 0.6646119356155396, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 1.7204610951008645, | |
| "grad_norm": 0.8586521744728088, | |
| "learning_rate": 1.0609018503966207e-06, | |
| "loss": 0.8926165103912354, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 1.7219020172910664, | |
| "grad_norm": 0.8918136954307556, | |
| "learning_rate": 1.0502428158532952e-06, | |
| "loss": 0.7644927501678467, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.723342939481268, | |
| "grad_norm": 0.7381912469863892, | |
| "learning_rate": 1.039634630023747e-06, | |
| "loss": 0.8272491693496704, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 1.7247838616714697, | |
| "grad_norm": 0.7400140166282654, | |
| "learning_rate": 1.0290773531791743e-06, | |
| "loss": 0.706849992275238, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 1.7262247838616713, | |
| "grad_norm": 0.8239127397537231, | |
| "learning_rate": 1.0185710453015374e-06, | |
| "loss": 0.7492132186889648, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 1.7276657060518732, | |
| "grad_norm": 0.7474268078804016, | |
| "learning_rate": 1.0081157660832086e-06, | |
| "loss": 0.6511049270629883, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 1.729106628242075, | |
| "grad_norm": 0.8263754844665527, | |
| "learning_rate": 9.977115749266331e-07, | |
| "loss": 0.6856651902198792, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7305475504322767, | |
| "grad_norm": 0.8760755658149719, | |
| "learning_rate": 9.87358530944006e-07, | |
| "loss": 0.8096874356269836, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 1.7319884726224783, | |
| "grad_norm": 0.7762811779975891, | |
| "learning_rate": 9.77056692956916e-07, | |
| "loss": 0.7226123809814453, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 1.73342939481268, | |
| "grad_norm": 0.9807335138320923, | |
| "learning_rate": 9.668061194960255e-07, | |
| "loss": 0.76618891954422, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 1.7348703170028819, | |
| "grad_norm": 0.8935138583183289, | |
| "learning_rate": 9.566068688007346e-07, | |
| "loss": 0.6808332204818726, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.7363112391930837, | |
| "grad_norm": 0.786347508430481, | |
| "learning_rate": 9.464589988188466e-07, | |
| "loss": 0.758068859577179, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.7377521613832854, | |
| "grad_norm": 0.9107068181037903, | |
| "learning_rate": 9.363625672062427e-07, | |
| "loss": 0.6974388957023621, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 1.739193083573487, | |
| "grad_norm": 0.8915838599205017, | |
| "learning_rate": 9.263176313265521e-07, | |
| "loss": 0.7282131314277649, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 1.7406340057636887, | |
| "grad_norm": 0.8899832963943481, | |
| "learning_rate": 9.163242482508306e-07, | |
| "loss": 0.7565990686416626, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 1.7420749279538905, | |
| "grad_norm": 0.7595863342285156, | |
| "learning_rate": 9.06382474757228e-07, | |
| "loss": 0.6799623370170593, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 1.7435158501440924, | |
| "grad_norm": 0.7380494475364685, | |
| "learning_rate": 8.964923673306725e-07, | |
| "loss": 0.6662101149559021, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.744956772334294, | |
| "grad_norm": 0.7743575572967529, | |
| "learning_rate": 8.866539821625519e-07, | |
| "loss": 0.8160727620124817, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 1.7463976945244957, | |
| "grad_norm": 0.8004354238510132, | |
| "learning_rate": 8.76867375150382e-07, | |
| "loss": 0.7559400796890259, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 1.7478386167146973, | |
| "grad_norm": 0.7768638730049133, | |
| "learning_rate": 8.671326018975024e-07, | |
| "loss": 0.705933690071106, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 1.7492795389048992, | |
| "grad_norm": 0.8964418172836304, | |
| "learning_rate": 8.574497177127561e-07, | |
| "loss": 0.7099254131317139, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 1.7507204610951008, | |
| "grad_norm": 0.7826142907142639, | |
| "learning_rate": 8.47818777610172e-07, | |
| "loss": 0.6332671046257019, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.7521613832853027, | |
| "grad_norm": 0.860312819480896, | |
| "learning_rate": 8.38239836308653e-07, | |
| "loss": 0.8072866201400757, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 1.7536023054755043, | |
| "grad_norm": 0.7804715037345886, | |
| "learning_rate": 8.287129482316725e-07, | |
| "loss": 0.6959720849990845, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 1.755043227665706, | |
| "grad_norm": 0.865706205368042, | |
| "learning_rate": 8.192381675069561e-07, | |
| "loss": 0.7419095039367676, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.7564841498559076, | |
| "grad_norm": 0.7246455550193787, | |
| "learning_rate": 8.098155479661751e-07, | |
| "loss": 0.6388202905654907, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 1.7579250720461095, | |
| "grad_norm": 0.7382645606994629, | |
| "learning_rate": 8.004451431446503e-07, | |
| "loss": 0.7727887034416199, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.7593659942363113, | |
| "grad_norm": 0.6949777603149414, | |
| "learning_rate": 7.911270062810338e-07, | |
| "loss": 0.6242851614952087, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 1.760806916426513, | |
| "grad_norm": 0.7804027795791626, | |
| "learning_rate": 7.818611903170159e-07, | |
| "loss": 0.7478713989257812, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 1.7622478386167146, | |
| "grad_norm": 0.9338253736495972, | |
| "learning_rate": 7.72647747897024e-07, | |
| "loss": 0.767483115196228, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 1.7636887608069163, | |
| "grad_norm": 0.8870203495025635, | |
| "learning_rate": 7.634867313679172e-07, | |
| "loss": 0.7354853749275208, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 1.7651296829971181, | |
| "grad_norm": 0.8656888604164124, | |
| "learning_rate": 7.543781927786953e-07, | |
| "loss": 0.8114850521087646, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.76657060518732, | |
| "grad_norm": 0.7293028831481934, | |
| "learning_rate": 7.453221838802027e-07, | |
| "loss": 0.676995038986206, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 1.7680115273775217, | |
| "grad_norm": 0.8250572085380554, | |
| "learning_rate": 7.363187561248275e-07, | |
| "loss": 0.7541552782058716, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 1.7694524495677233, | |
| "grad_norm": 0.7912562489509583, | |
| "learning_rate": 7.273679606662166e-07, | |
| "loss": 0.7518225908279419, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 1.770893371757925, | |
| "grad_norm": 0.9369992017745972, | |
| "learning_rate": 7.184698483589858e-07, | |
| "loss": 0.6083760261535645, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 1.7723342939481268, | |
| "grad_norm": 0.7918375134468079, | |
| "learning_rate": 7.096244697584221e-07, | |
| "loss": 0.7271479964256287, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.7737752161383287, | |
| "grad_norm": 0.804225742816925, | |
| "learning_rate": 7.008318751202048e-07, | |
| "loss": 0.7801766991615295, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 1.7752161383285303, | |
| "grad_norm": 0.8039699196815491, | |
| "learning_rate": 6.92092114400118e-07, | |
| "loss": 0.8230787515640259, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 1.776657060518732, | |
| "grad_norm": 0.7346729636192322, | |
| "learning_rate": 6.834052372537658e-07, | |
| "loss": 0.6628938913345337, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 1.7780979827089336, | |
| "grad_norm": 0.7379924654960632, | |
| "learning_rate": 6.747712930362848e-07, | |
| "loss": 0.6801489591598511, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 1.7795389048991355, | |
| "grad_norm": 0.8555311560630798, | |
| "learning_rate": 6.661903308020801e-07, | |
| "loss": 0.7245817184448242, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 1.7809798270893373, | |
| "grad_norm": 0.8269429802894592, | |
| "learning_rate": 6.57662399304525e-07, | |
| "loss": 0.632072925567627, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 1.782420749279539, | |
| "grad_norm": 0.6983954906463623, | |
| "learning_rate": 6.491875469956998e-07, | |
| "loss": 0.6843686699867249, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 1.7838616714697406, | |
| "grad_norm": 0.8060896992683411, | |
| "learning_rate": 6.407658220261126e-07, | |
| "loss": 0.65267014503479, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 1.7853025936599423, | |
| "grad_norm": 0.8000738024711609, | |
| "learning_rate": 6.323972722444215e-07, | |
| "loss": 0.6732505559921265, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 1.7867435158501441, | |
| "grad_norm": 0.7847197651863098, | |
| "learning_rate": 6.240819451971658e-07, | |
| "loss": 0.6458503603935242, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.7881844380403458, | |
| "grad_norm": 0.9293048977851868, | |
| "learning_rate": 6.158198881284994e-07, | |
| "loss": 0.8058085441589355, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 1.7896253602305476, | |
| "grad_norm": 0.8233553171157837, | |
| "learning_rate": 6.076111479799162e-07, | |
| "loss": 0.7245039343833923, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 1.7910662824207493, | |
| "grad_norm": 0.8260737061500549, | |
| "learning_rate": 5.994557713899829e-07, | |
| "loss": 0.6901232004165649, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 1.792507204610951, | |
| "grad_norm": 0.9803736209869385, | |
| "learning_rate": 5.913538046940859e-07, | |
| "loss": 0.6991169452667236, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 1.7939481268011528, | |
| "grad_norm": 0.7552391290664673, | |
| "learning_rate": 5.833052939241513e-07, | |
| "loss": 0.7670243978500366, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 1.7953890489913544, | |
| "grad_norm": 0.8138408660888672, | |
| "learning_rate": 5.753102848083924e-07, | |
| "loss": 0.5832873582839966, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 1.7968299711815563, | |
| "grad_norm": 0.7978786826133728, | |
| "learning_rate": 5.673688227710539e-07, | |
| "loss": 0.682740330696106, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 1.798270893371758, | |
| "grad_norm": 0.7594887018203735, | |
| "learning_rate": 5.594809529321443e-07, | |
| "loss": 0.6237415671348572, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 1.7997118155619596, | |
| "grad_norm": 0.9314215779304504, | |
| "learning_rate": 5.516467201071829e-07, | |
| "loss": 0.6325702667236328, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 1.8011527377521612, | |
| "grad_norm": 0.8046096563339233, | |
| "learning_rate": 5.438661688069513e-07, | |
| "loss": 0.6462284326553345, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.802593659942363, | |
| "grad_norm": 0.8647462129592896, | |
| "learning_rate": 5.361393432372319e-07, | |
| "loss": 0.7181062698364258, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 1.804034582132565, | |
| "grad_norm": 0.8170526027679443, | |
| "learning_rate": 5.284662872985602e-07, | |
| "loss": 0.7745201587677002, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 1.8054755043227666, | |
| "grad_norm": 0.9152999520301819, | |
| "learning_rate": 5.208470445859782e-07, | |
| "loss": 0.8000661134719849, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 1.8069164265129682, | |
| "grad_norm": 0.8332073092460632, | |
| "learning_rate": 5.132816583887812e-07, | |
| "loss": 0.7485237121582031, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 1.8083573487031699, | |
| "grad_norm": 0.7533081769943237, | |
| "learning_rate": 5.057701716902764e-07, | |
| "loss": 0.6614536046981812, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 1.8097982708933718, | |
| "grad_norm": 0.7314124703407288, | |
| "learning_rate": 4.983126271675354e-07, | |
| "loss": 0.5718865394592285, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 1.8112391930835736, | |
| "grad_norm": 0.7745943665504456, | |
| "learning_rate": 4.909090671911554e-07, | |
| "loss": 0.6789857149124146, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 1.8126801152737753, | |
| "grad_norm": 0.9485689401626587, | |
| "learning_rate": 4.835595338250155e-07, | |
| "loss": 0.7332895398139954, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 1.814121037463977, | |
| "grad_norm": 0.7740848660469055, | |
| "learning_rate": 4.762640688260356e-07, | |
| "loss": 0.7217935919761658, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 1.8155619596541785, | |
| "grad_norm": 0.7472824454307556, | |
| "learning_rate": 4.690227136439496e-07, | |
| "loss": 0.7193496227264404, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.8170028818443804, | |
| "grad_norm": 0.8185622692108154, | |
| "learning_rate": 4.618355094210547e-07, | |
| "loss": 0.7183260917663574, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 1.8184438040345823, | |
| "grad_norm": 0.6518324017524719, | |
| "learning_rate": 4.5470249699198667e-07, | |
| "loss": 0.5979580879211426, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 1.819884726224784, | |
| "grad_norm": 0.8009700179100037, | |
| "learning_rate": 4.476237168834929e-07, | |
| "loss": 0.9154891967773438, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 1.8213256484149856, | |
| "grad_norm": 0.814588189125061, | |
| "learning_rate": 4.4059920931418866e-07, | |
| "loss": 0.7386868000030518, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 1.8227665706051872, | |
| "grad_norm": 0.754295289516449, | |
| "learning_rate": 4.336290141943367e-07, | |
| "loss": 0.8517154455184937, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 1.824207492795389, | |
| "grad_norm": 0.9901515245437622, | |
| "learning_rate": 4.267131711256245e-07, | |
| "loss": 0.8675104379653931, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 1.825648414985591, | |
| "grad_norm": 0.815948486328125, | |
| "learning_rate": 4.1985171940092884e-07, | |
| "loss": 0.7671102285385132, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 1.8270893371757926, | |
| "grad_norm": 0.7330636978149414, | |
| "learning_rate": 4.130446980041003e-07, | |
| "loss": 0.6249233484268188, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 1.8285302593659942, | |
| "grad_norm": 0.8817590475082397, | |
| "learning_rate": 4.0629214560973907e-07, | |
| "loss": 0.7181810140609741, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 1.8299711815561959, | |
| "grad_norm": 0.7729085087776184, | |
| "learning_rate": 3.995941005829773e-07, | |
| "loss": 0.5876595973968506, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.8314121037463977, | |
| "grad_norm": 0.7303892970085144, | |
| "learning_rate": 3.92950600979255e-07, | |
| "loss": 0.6904604434967041, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 1.8328530259365994, | |
| "grad_norm": 0.8344207406044006, | |
| "learning_rate": 3.863616845441154e-07, | |
| "loss": 0.7644379138946533, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 1.8342939481268012, | |
| "grad_norm": 0.7694862484931946, | |
| "learning_rate": 3.798273887129755e-07, | |
| "loss": 0.6588039398193359, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 1.8357348703170029, | |
| "grad_norm": 0.788310170173645, | |
| "learning_rate": 3.733477506109262e-07, | |
| "loss": 0.708280086517334, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 1.8371757925072045, | |
| "grad_norm": 0.67696613073349, | |
| "learning_rate": 3.669228070525177e-07, | |
| "loss": 0.6759251356124878, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.8386167146974062, | |
| "grad_norm": 0.8932890295982361, | |
| "learning_rate": 3.6055259454154334e-07, | |
| "loss": 0.8551055788993835, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 1.840057636887608, | |
| "grad_norm": 0.8879607915878296, | |
| "learning_rate": 3.5423714927084186e-07, | |
| "loss": 0.8530901670455933, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 1.84149855907781, | |
| "grad_norm": 0.7950157523155212, | |
| "learning_rate": 3.4797650712208863e-07, | |
| "loss": 0.5972962379455566, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.8429394812680115, | |
| "grad_norm": 0.9025905728340149, | |
| "learning_rate": 3.417707036655882e-07, | |
| "loss": 0.7006347179412842, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 1.8443804034582132, | |
| "grad_norm": 0.7743578553199768, | |
| "learning_rate": 3.356197741600753e-07, | |
| "loss": 0.7719041705131531, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.8458213256484148, | |
| "grad_norm": 0.8957917094230652, | |
| "learning_rate": 3.2952375355251865e-07, | |
| "loss": 0.7189593315124512, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 1.8472622478386167, | |
| "grad_norm": 0.7992256283760071, | |
| "learning_rate": 3.234826764779131e-07, | |
| "loss": 0.6644465923309326, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 1.8487031700288186, | |
| "grad_norm": 0.7817349433898926, | |
| "learning_rate": 3.174965772590866e-07, | |
| "loss": 0.7548238039016724, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 1.8501440922190202, | |
| "grad_norm": 0.8072176575660706, | |
| "learning_rate": 3.1156548990651237e-07, | |
| "loss": 0.6053937673568726, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 1.8515850144092219, | |
| "grad_norm": 1.0495251417160034, | |
| "learning_rate": 3.0568944811810497e-07, | |
| "loss": 0.6877189874649048, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 1.8530259365994235, | |
| "grad_norm": 0.8143266439437866, | |
| "learning_rate": 2.9986848527903347e-07, | |
| "loss": 0.8271951675415039, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 1.8544668587896254, | |
| "grad_norm": 0.8350967764854431, | |
| "learning_rate": 2.9410263446153385e-07, | |
| "loss": 0.6571352481842041, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 1.8559077809798272, | |
| "grad_norm": 0.9221143126487732, | |
| "learning_rate": 2.8839192842471943e-07, | |
| "loss": 0.743099570274353, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 1.8573487031700289, | |
| "grad_norm": 0.7692269682884216, | |
| "learning_rate": 2.827363996143895e-07, | |
| "loss": 0.7288011312484741, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 1.8587896253602305, | |
| "grad_norm": 0.8214716911315918, | |
| "learning_rate": 2.771360801628575e-07, | |
| "loss": 0.6358669996261597, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.8602305475504322, | |
| "grad_norm": 0.7842360734939575, | |
| "learning_rate": 2.7159100188875355e-07, | |
| "loss": 0.7078214883804321, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 1.861671469740634, | |
| "grad_norm": 0.7359623312950134, | |
| "learning_rate": 2.6610119629685517e-07, | |
| "loss": 0.6269357204437256, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 1.8631123919308359, | |
| "grad_norm": 0.8186429142951965, | |
| "learning_rate": 2.606666945779024e-07, | |
| "loss": 0.7744366526603699, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 1.8645533141210375, | |
| "grad_norm": 0.5552643537521362, | |
| "learning_rate": 2.552875276084232e-07, | |
| "loss": 0.4451786279678345, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 1.8659942363112392, | |
| "grad_norm": 0.779941976070404, | |
| "learning_rate": 2.4996372595055605e-07, | |
| "loss": 0.7336533069610596, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 1.8674351585014408, | |
| "grad_norm": 0.9156535863876343, | |
| "learning_rate": 2.446953198518776e-07, | |
| "loss": 0.7989022135734558, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 1.8688760806916427, | |
| "grad_norm": 0.806794285774231, | |
| "learning_rate": 2.394823392452306e-07, | |
| "loss": 0.7164968252182007, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 1.8703170028818443, | |
| "grad_norm": 0.793441116809845, | |
| "learning_rate": 2.343248137485532e-07, | |
| "loss": 0.6987060308456421, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 1.8717579250720462, | |
| "grad_norm": 0.7144315838813782, | |
| "learning_rate": 2.2922277266471226e-07, | |
| "loss": 0.683693528175354, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 1.8731988472622478, | |
| "grad_norm": 0.788743257522583, | |
| "learning_rate": 2.241762449813345e-07, | |
| "loss": 0.7102502584457397, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.8746397694524495, | |
| "grad_norm": 0.955685555934906, | |
| "learning_rate": 2.191852593706456e-07, | |
| "loss": 0.6945414543151855, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 1.8760806916426513, | |
| "grad_norm": 0.8227760195732117, | |
| "learning_rate": 2.142498441893004e-07, | |
| "loss": 0.7690091133117676, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 1.877521613832853, | |
| "grad_norm": 0.9441574811935425, | |
| "learning_rate": 2.0937002747823067e-07, | |
| "loss": 0.7788715362548828, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 1.8789625360230549, | |
| "grad_norm": 0.7637494802474976, | |
| "learning_rate": 2.0454583696247864e-07, | |
| "loss": 0.6455879807472229, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 1.8804034582132565, | |
| "grad_norm": 0.7609235644340515, | |
| "learning_rate": 1.997773000510428e-07, | |
| "loss": 0.7027079463005066, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 1.8818443804034581, | |
| "grad_norm": 0.8110032677650452, | |
| "learning_rate": 1.9506444383672328e-07, | |
| "loss": 0.6902576684951782, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 1.8832853025936598, | |
| "grad_norm": 0.8210185766220093, | |
| "learning_rate": 1.9040729509596235e-07, | |
| "loss": 0.6750905513763428, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 1.8847262247838616, | |
| "grad_norm": 0.965930163860321, | |
| "learning_rate": 1.8580588028869972e-07, | |
| "loss": 0.6262344121932983, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 1.8861671469740635, | |
| "grad_norm": 0.7677191495895386, | |
| "learning_rate": 1.8126022555821742e-07, | |
| "loss": 0.6965380907058716, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 1.8876080691642652, | |
| "grad_norm": 0.795842170715332, | |
| "learning_rate": 1.7677035673099196e-07, | |
| "loss": 0.6852426528930664, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.8890489913544668, | |
| "grad_norm": 0.829835832118988, | |
| "learning_rate": 1.7233629931654782e-07, | |
| "loss": 0.7562562227249146, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 1.8904899135446684, | |
| "grad_norm": 0.8017676472663879, | |
| "learning_rate": 1.6795807850731428e-07, | |
| "loss": 0.6990101337432861, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 1.8919308357348703, | |
| "grad_norm": 0.8548034429550171, | |
| "learning_rate": 1.6363571917847875e-07, | |
| "loss": 0.7861789464950562, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 1.8933717579250722, | |
| "grad_norm": 0.8647658228874207, | |
| "learning_rate": 1.593692458878482e-07, | |
| "loss": 0.8342926502227783, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 1.8948126801152738, | |
| "grad_norm": 0.8461260795593262, | |
| "learning_rate": 1.5515868287571124e-07, | |
| "loss": 0.7062436938285828, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 1.8962536023054755, | |
| "grad_norm": 0.7481532096862793, | |
| "learning_rate": 1.5100405406469508e-07, | |
| "loss": 0.8491038084030151, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 1.897694524495677, | |
| "grad_norm": 0.846656084060669, | |
| "learning_rate": 1.469053830596323e-07, | |
| "loss": 0.7018347978591919, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 1.899135446685879, | |
| "grad_norm": 0.8599597811698914, | |
| "learning_rate": 1.4286269314743085e-07, | |
| "loss": 0.729584813117981, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 1.9005763688760808, | |
| "grad_norm": 0.7672188878059387, | |
| "learning_rate": 1.3887600729693307e-07, | |
| "loss": 0.6782927513122559, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 1.9020172910662825, | |
| "grad_norm": 0.6663503050804138, | |
| "learning_rate": 1.3494534815879257e-07, | |
| "loss": 0.6440367102622986, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.9034582132564841, | |
| "grad_norm": 0.824517011642456, | |
| "learning_rate": 1.310707380653442e-07, | |
| "loss": 0.6864629983901978, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 1.9048991354466858, | |
| "grad_norm": 0.7598153948783875, | |
| "learning_rate": 1.2725219903047425e-07, | |
| "loss": 0.7515442371368408, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 1.9063400576368876, | |
| "grad_norm": 0.9752185940742493, | |
| "learning_rate": 1.2348975274949605e-07, | |
| "loss": 0.8176276683807373, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 1.9077809798270895, | |
| "grad_norm": 0.7803353667259216, | |
| "learning_rate": 1.1978342059902892e-07, | |
| "loss": 0.6281642913818359, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 1.9092219020172911, | |
| "grad_norm": 0.8944565057754517, | |
| "learning_rate": 1.161332236368784e-07, | |
| "loss": 0.7009260654449463, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.9106628242074928, | |
| "grad_norm": 0.7747707366943359, | |
| "learning_rate": 1.1253918260190844e-07, | |
| "loss": 0.7126386761665344, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 1.9121037463976944, | |
| "grad_norm": 0.7823174595832825, | |
| "learning_rate": 1.0900131791393265e-07, | |
| "loss": 0.688586950302124, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 1.9135446685878963, | |
| "grad_norm": 0.8464524149894714, | |
| "learning_rate": 1.0551964967359441e-07, | |
| "loss": 0.8571232557296753, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 1.914985590778098, | |
| "grad_norm": 0.8994123935699463, | |
| "learning_rate": 1.0209419766225026e-07, | |
| "loss": 0.819856584072113, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 1.9164265129682998, | |
| "grad_norm": 0.7324085831642151, | |
| "learning_rate": 9.872498134186115e-08, | |
| "loss": 0.6851845979690552, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.9178674351585014, | |
| "grad_norm": 0.798224925994873, | |
| "learning_rate": 9.541201985488358e-08, | |
| "loss": 0.7378122210502625, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 1.919308357348703, | |
| "grad_norm": 0.7607042789459229, | |
| "learning_rate": 9.215533202415306e-08, | |
| "loss": 0.7003333568572998, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 1.920749279538905, | |
| "grad_norm": 0.8446595072746277, | |
| "learning_rate": 8.89549363527864e-08, | |
| "loss": 0.6810410022735596, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 1.9221902017291066, | |
| "grad_norm": 0.7857621312141418, | |
| "learning_rate": 8.581085102407072e-08, | |
| "loss": 0.6305010318756104, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 1.9236311239193085, | |
| "grad_norm": 0.8723453879356384, | |
| "learning_rate": 8.272309390136013e-08, | |
| "loss": 0.6027143597602844, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 1.92507204610951, | |
| "grad_norm": 0.8410208821296692, | |
| "learning_rate": 7.96916825279781e-08, | |
| "loss": 0.7244712114334106, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 1.9265129682997117, | |
| "grad_norm": 0.8330152034759521, | |
| "learning_rate": 7.671663412711527e-08, | |
| "loss": 0.810680627822876, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 1.9279538904899134, | |
| "grad_norm": 0.7898954749107361, | |
| "learning_rate": 7.37979656017318e-08, | |
| "loss": 0.759876012802124, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 1.9293948126801153, | |
| "grad_norm": 0.7904804348945618, | |
| "learning_rate": 7.09356935344585e-08, | |
| "loss": 0.6283839344978333, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 1.9308357348703171, | |
| "grad_norm": 0.7238035202026367, | |
| "learning_rate": 6.812983418750917e-08, | |
| "loss": 0.6455138921737671, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.9322766570605188, | |
| "grad_norm": 0.8438715934753418, | |
| "learning_rate": 6.538040350258401e-08, | |
| "loss": 0.8216662406921387, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 1.9337175792507204, | |
| "grad_norm": 0.7600931525230408, | |
| "learning_rate": 6.268741710077741e-08, | |
| "loss": 0.8431342840194702, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 1.935158501440922, | |
| "grad_norm": 0.8402287364006042, | |
| "learning_rate": 6.005089028249366e-08, | |
| "loss": 0.826948881149292, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 1.936599423631124, | |
| "grad_norm": 0.8917995691299438, | |
| "learning_rate": 5.747083802735587e-08, | |
| "loss": 0.7549749612808228, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 1.9380403458213258, | |
| "grad_norm": 0.8245948553085327, | |
| "learning_rate": 5.494727499412489e-08, | |
| "loss": 0.6571122407913208, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 1.9394812680115274, | |
| "grad_norm": 0.7117204070091248, | |
| "learning_rate": 5.2480215520611665e-08, | |
| "loss": 0.7198864817619324, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 1.940922190201729, | |
| "grad_norm": 0.8619735240936279, | |
| "learning_rate": 5.006967362359949e-08, | |
| "loss": 0.7122694253921509, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 1.9423631123919307, | |
| "grad_norm": 0.8721776008605957, | |
| "learning_rate": 4.7715662998760735e-08, | |
| "loss": 0.7027156352996826, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 1.9438040345821326, | |
| "grad_norm": 0.7703850269317627, | |
| "learning_rate": 4.54181970205847e-08, | |
| "loss": 0.6207553148269653, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 1.9452449567723344, | |
| "grad_norm": 0.875476598739624, | |
| "learning_rate": 4.317728874229321e-08, | |
| "loss": 0.6593065857887268, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.946685878962536, | |
| "grad_norm": 0.784439742565155, | |
| "learning_rate": 4.0992950895776265e-08, | |
| "loss": 0.6175656914710999, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 1.9481268011527377, | |
| "grad_norm": 0.8737075924873352, | |
| "learning_rate": 3.8865195891512054e-08, | |
| "loss": 0.8658263087272644, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 1.9495677233429394, | |
| "grad_norm": 0.8800835609436035, | |
| "learning_rate": 3.679403581849927e-08, | |
| "loss": 0.8155512809753418, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 1.9510086455331412, | |
| "grad_norm": 0.7003129124641418, | |
| "learning_rate": 3.477948244418716e-08, | |
| "loss": 0.6445010900497437, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 1.952449567723343, | |
| "grad_norm": 0.8185921311378479, | |
| "learning_rate": 3.2821547214413327e-08, | |
| "loss": 0.687210202217102, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 1.9538904899135447, | |
| "grad_norm": 0.6671662330627441, | |
| "learning_rate": 3.0920241253331596e-08, | |
| "loss": 0.7458325624465942, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 1.9553314121037464, | |
| "grad_norm": 0.86750727891922, | |
| "learning_rate": 2.9075575363355368e-08, | |
| "loss": 0.7063475847244263, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 1.956772334293948, | |
| "grad_norm": 0.6927515864372253, | |
| "learning_rate": 2.728756002508881e-08, | |
| "loss": 0.726108968257904, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 1.95821325648415, | |
| "grad_norm": 0.7644289135932922, | |
| "learning_rate": 2.555620539727799e-08, | |
| "loss": 0.7151061296463013, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 1.9596541786743515, | |
| "grad_norm": 0.8003210425376892, | |
| "learning_rate": 2.388152131674093e-08, | |
| "loss": 0.7040859460830688, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.9610951008645534, | |
| "grad_norm": 0.9617341160774231, | |
| "learning_rate": 2.2263517298320992e-08, | |
| "loss": 0.6567614078521729, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 1.962536023054755, | |
| "grad_norm": 0.8938956260681152, | |
| "learning_rate": 2.070220253483024e-08, | |
| "loss": 0.7761929035186768, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 1.9639769452449567, | |
| "grad_norm": 0.9245355725288391, | |
| "learning_rate": 1.919758589699283e-08, | |
| "loss": 0.7922526597976685, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 1.9654178674351583, | |
| "grad_norm": 0.8674725294113159, | |
| "learning_rate": 1.774967593340171e-08, | |
| "loss": 0.7022340297698975, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 1.9668587896253602, | |
| "grad_norm": 0.8854632377624512, | |
| "learning_rate": 1.635848087046532e-08, | |
| "loss": 0.7119938135147095, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 1.968299711815562, | |
| "grad_norm": 0.7363027930259705, | |
| "learning_rate": 1.5024008612363196e-08, | |
| "loss": 0.636238694190979, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 1.9697406340057637, | |
| "grad_norm": 0.830125629901886, | |
| "learning_rate": 1.3746266740997104e-08, | |
| "loss": 0.6909417510032654, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 1.9711815561959654, | |
| "grad_norm": 0.8109197020530701, | |
| "learning_rate": 1.2525262515954429e-08, | |
| "loss": 0.7618024349212646, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 1.972622478386167, | |
| "grad_norm": 0.8020060062408447, | |
| "learning_rate": 1.1361002874461512e-08, | |
| "loss": 0.8216003179550171, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 1.9740634005763689, | |
| "grad_norm": 0.818276047706604, | |
| "learning_rate": 1.0253494431347045e-08, | |
| "loss": 0.7050395011901855, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.9755043227665707, | |
| "grad_norm": 0.7677318453788757, | |
| "learning_rate": 9.202743479002074e-09, | |
| "loss": 0.6805046200752258, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 1.9769452449567724, | |
| "grad_norm": 0.7596999406814575, | |
| "learning_rate": 8.208755987346717e-09, | |
| "loss": 0.6607006788253784, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 1.978386167146974, | |
| "grad_norm": 0.7905407547950745, | |
| "learning_rate": 7.27153760379462e-09, | |
| "loss": 0.7673226594924927, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 1.9798270893371757, | |
| "grad_norm": 0.7896655201911926, | |
| "learning_rate": 6.391093653224101e-09, | |
| "loss": 0.7531790733337402, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 1.9812680115273775, | |
| "grad_norm": 0.9625368118286133, | |
| "learning_rate": 5.567429137940395e-09, | |
| "loss": 0.6215754151344299, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.9827089337175794, | |
| "grad_norm": 0.8169432282447815, | |
| "learning_rate": 4.800548737656785e-09, | |
| "loss": 0.6860508322715759, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.984149855907781, | |
| "grad_norm": 0.8397043347358704, | |
| "learning_rate": 4.090456809462407e-09, | |
| "loss": 0.631721556186676, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 1.9855907780979827, | |
| "grad_norm": 0.8882712721824646, | |
| "learning_rate": 3.4371573877944874e-09, | |
| "loss": 0.6776498556137085, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 1.9870317002881843, | |
| "grad_norm": 0.7635476589202881, | |
| "learning_rate": 2.840654184425029e-09, | |
| "loss": 0.6540400981903076, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 1.9884726224783862, | |
| "grad_norm": 0.9316484332084656, | |
| "learning_rate": 2.300950588430828e-09, | |
| "loss": 0.6866999268531799, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.989913544668588, | |
| "grad_norm": 0.802742600440979, | |
| "learning_rate": 1.8180496661779346e-09, | |
| "loss": 0.6650329232215881, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 1.9913544668587897, | |
| "grad_norm": 0.7995099425315857, | |
| "learning_rate": 1.391954161304998e-09, | |
| "loss": 0.7718065977096558, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 1.9927953890489913, | |
| "grad_norm": 0.7001140713691711, | |
| "learning_rate": 1.0226664947032838e-09, | |
| "loss": 0.570642352104187, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 1.994236311239193, | |
| "grad_norm": 0.8440130949020386, | |
| "learning_rate": 7.101887645100114e-10, | |
| "loss": 0.8078399896621704, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 1.9956772334293948, | |
| "grad_norm": 0.8214783072471619, | |
| "learning_rate": 4.54522746090591e-10, | |
| "loss": 0.6115165948867798, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 1.9971181556195965, | |
| "grad_norm": 0.8176844120025635, | |
| "learning_rate": 2.55669892030852e-10, | |
| "loss": 0.7604609727859497, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 1.9985590778097984, | |
| "grad_norm": 0.8071082830429077, | |
| "learning_rate": 1.1363133212705102e-10, | |
| "loss": 0.6580838561058044, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.7433238625526428, | |
| "learning_rate": 2.840787338254103e-11, | |
| "loss": 0.6367690563201904, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 1388, | |
| "total_flos": 1.0037139745307361e+18, | |
| "train_loss": 0.7883986402571374, | |
| "train_runtime": 11184.8694, | |
| "train_samples_per_second": 0.496, | |
| "train_steps_per_second": 0.124 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1388, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0037139745307361e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |