Video-Text-to-Text
Transformers
Safetensors
English
qwen2_5_vl
image-text-to-text
video-understanding
reasoning
multimodal
reinforcement-learning
question-answering
text-generation-inference
Instructions to use Falconss1/VideoThinker-R1-Bias-3B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Falconss1/VideoThinker-R1-Bias-3B with Transformers:
# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("Falconss1/VideoThinker-R1-Bias-3B") model = AutoModelForMultimodalLM.from_pretrained("Falconss1/VideoThinker-R1-Bias-3B") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.25, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005, | |
| "grad_norm": 6.1962890625, | |
| "learning_rate": 9.995e-07, | |
| "loss": -0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001, | |
| "grad_norm": 6.744086742401123, | |
| "learning_rate": 9.989999999999999e-07, | |
| "loss": -0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0015, | |
| "grad_norm": 6.945072174072266, | |
| "learning_rate": 9.985e-07, | |
| "loss": 0.0, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.002, | |
| "grad_norm": 6.354312419891357, | |
| "learning_rate": 9.98e-07, | |
| "loss": -0.0, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0025, | |
| "grad_norm": 5.802479267120361, | |
| "learning_rate": 9.975e-07, | |
| "loss": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.003, | |
| "grad_norm": 4.5852274894714355, | |
| "learning_rate": 9.97e-07, | |
| "loss": 0.0, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0035, | |
| "grad_norm": 7.049472332000732, | |
| "learning_rate": 9.965e-07, | |
| "loss": 0.0, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.004, | |
| "grad_norm": 21.362648010253906, | |
| "learning_rate": 9.959999999999999e-07, | |
| "loss": -0.0, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0045, | |
| "grad_norm": 5.594510555267334, | |
| "learning_rate": 9.955e-07, | |
| "loss": 0.0, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.005, | |
| "grad_norm": 5.9653730392456055, | |
| "learning_rate": 9.95e-07, | |
| "loss": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0055, | |
| "grad_norm": 5.095400333404541, | |
| "learning_rate": 9.945e-07, | |
| "loss": -0.0, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.006, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.94e-07, | |
| "loss": 0.0, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0065, | |
| "grad_norm": 10.911425590515137, | |
| "learning_rate": 9.935e-07, | |
| "loss": -0.0, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.007, | |
| "grad_norm": 9.652170181274414, | |
| "learning_rate": 9.929999999999999e-07, | |
| "loss": 0.0, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0075, | |
| "grad_norm": 6.956664562225342, | |
| "learning_rate": 9.925e-07, | |
| "loss": 0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.008, | |
| "grad_norm": 12.070667266845703, | |
| "learning_rate": 9.92e-07, | |
| "loss": 0.0, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0085, | |
| "grad_norm": 14.007853507995605, | |
| "learning_rate": 9.915e-07, | |
| "loss": 0.0, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.009, | |
| "grad_norm": 4.017375469207764, | |
| "learning_rate": 9.91e-07, | |
| "loss": 0.0, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0095, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.905e-07, | |
| "loss": 0.0, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 6.546974182128906, | |
| "learning_rate": 9.9e-07, | |
| "loss": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0105, | |
| "grad_norm": 7.551206588745117, | |
| "learning_rate": 9.895e-07, | |
| "loss": -0.0, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.011, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.89e-07, | |
| "loss": 0.0, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0115, | |
| "grad_norm": 6.233001232147217, | |
| "learning_rate": 9.885e-07, | |
| "loss": -0.0, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.88e-07, | |
| "loss": 0.0, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0125, | |
| "grad_norm": 7.307622909545898, | |
| "learning_rate": 9.875e-07, | |
| "loss": -0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.013, | |
| "grad_norm": 5.898115158081055, | |
| "learning_rate": 9.87e-07, | |
| "loss": -0.0, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0135, | |
| "grad_norm": 8.286269187927246, | |
| "learning_rate": 9.865e-07, | |
| "loss": 0.0, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.014, | |
| "grad_norm": 9.178420066833496, | |
| "learning_rate": 9.86e-07, | |
| "loss": 0.0, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0145, | |
| "grad_norm": 7.090274810791016, | |
| "learning_rate": 9.855e-07, | |
| "loss": 0.0, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.015, | |
| "grad_norm": 10.001739501953125, | |
| "learning_rate": 9.849999999999999e-07, | |
| "loss": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0155, | |
| "grad_norm": 8.978482246398926, | |
| "learning_rate": 9.845e-07, | |
| "loss": 0.0, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 8.083369255065918, | |
| "learning_rate": 9.84e-07, | |
| "loss": -0.0, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0165, | |
| "grad_norm": 9.646997451782227, | |
| "learning_rate": 9.835e-07, | |
| "loss": 0.0, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.017, | |
| "grad_norm": 6.892234802246094, | |
| "learning_rate": 9.83e-07, | |
| "loss": 0.0, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0175, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.825e-07, | |
| "loss": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.018, | |
| "grad_norm": 6.182197570800781, | |
| "learning_rate": 9.819999999999999e-07, | |
| "loss": 0.0, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0185, | |
| "grad_norm": 5.895266532897949, | |
| "learning_rate": 9.815e-07, | |
| "loss": -0.0, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.019, | |
| "grad_norm": 11.212841033935547, | |
| "learning_rate": 9.81e-07, | |
| "loss": -0.0, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0195, | |
| "grad_norm": 7.982095241546631, | |
| "learning_rate": 9.805e-07, | |
| "loss": 0.0, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 5.73940896987915, | |
| "learning_rate": 9.8e-07, | |
| "loss": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0205, | |
| "grad_norm": 8.540511131286621, | |
| "learning_rate": 9.795e-07, | |
| "loss": -0.0, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.021, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.789999999999999e-07, | |
| "loss": 0.0, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0215, | |
| "grad_norm": 8.709277153015137, | |
| "learning_rate": 9.785e-07, | |
| "loss": -0.0, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.022, | |
| "grad_norm": 6.68982458114624, | |
| "learning_rate": 9.78e-07, | |
| "loss": 0.0, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.0225, | |
| "grad_norm": 6.988176345825195, | |
| "learning_rate": 9.775e-07, | |
| "loss": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.023, | |
| "grad_norm": 7.0302910804748535, | |
| "learning_rate": 9.77e-07, | |
| "loss": 0.0, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0235, | |
| "grad_norm": 8.396454811096191, | |
| "learning_rate": 9.765e-07, | |
| "loss": -0.0, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 4.7376227378845215, | |
| "learning_rate": 9.759999999999998e-07, | |
| "loss": -0.0, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0245, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.755e-07, | |
| "loss": 0.0, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.025, | |
| "grad_norm": 6.381641387939453, | |
| "learning_rate": 9.75e-07, | |
| "loss": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0255, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.745e-07, | |
| "loss": 0.0, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.026, | |
| "grad_norm": 8.140380859375, | |
| "learning_rate": 9.74e-07, | |
| "loss": -0.0, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0265, | |
| "grad_norm": 4.727418899536133, | |
| "learning_rate": 9.735e-07, | |
| "loss": 0.0, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.027, | |
| "grad_norm": 6.386085510253906, | |
| "learning_rate": 9.729999999999998e-07, | |
| "loss": -0.0, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0275, | |
| "grad_norm": 6.39836311340332, | |
| "learning_rate": 9.725e-07, | |
| "loss": 0.0, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.028, | |
| "grad_norm": 5.749513149261475, | |
| "learning_rate": 9.72e-07, | |
| "loss": -0.0, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.0285, | |
| "grad_norm": 4.699296474456787, | |
| "learning_rate": 9.715e-07, | |
| "loss": -0.0, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.029, | |
| "grad_norm": 8.458806037902832, | |
| "learning_rate": 9.709999999999999e-07, | |
| "loss": -0.0, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.0295, | |
| "grad_norm": 9.1854248046875, | |
| "learning_rate": 9.705e-07, | |
| "loss": -0.0, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.844909191131592, | |
| "learning_rate": 9.7e-07, | |
| "loss": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0305, | |
| "grad_norm": 33.0734977722168, | |
| "learning_rate": 9.695e-07, | |
| "loss": 0.0, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.031, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.69e-07, | |
| "loss": 0.0, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0315, | |
| "grad_norm": 7.425229072570801, | |
| "learning_rate": 9.685e-07, | |
| "loss": 0.0, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 9.169403076171875, | |
| "learning_rate": 9.679999999999999e-07, | |
| "loss": -0.0, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.0325, | |
| "grad_norm": 13.490100860595703, | |
| "learning_rate": 9.675e-07, | |
| "loss": 0.0, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.033, | |
| "grad_norm": 7.570629596710205, | |
| "learning_rate": 9.67e-07, | |
| "loss": -0.0, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.0335, | |
| "grad_norm": 5.252549648284912, | |
| "learning_rate": 9.665e-07, | |
| "loss": 0.0, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.034, | |
| "grad_norm": 5.543639183044434, | |
| "learning_rate": 9.66e-07, | |
| "loss": -0.0, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.0345, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.655e-07, | |
| "loss": 0.0, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.035, | |
| "grad_norm": 5.360587120056152, | |
| "learning_rate": 9.649999999999999e-07, | |
| "loss": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0355, | |
| "grad_norm": 7.327621936798096, | |
| "learning_rate": 9.645e-07, | |
| "loss": 0.0, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.036, | |
| "grad_norm": 9.594143867492676, | |
| "learning_rate": 9.64e-07, | |
| "loss": 0.0, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.0365, | |
| "grad_norm": 5.346116065979004, | |
| "learning_rate": 9.635e-07, | |
| "loss": 0.0, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.037, | |
| "grad_norm": 5.963859558105469, | |
| "learning_rate": 9.63e-07, | |
| "loss": 0.0, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.0375, | |
| "grad_norm": 7.078248023986816, | |
| "learning_rate": 9.624999999999999e-07, | |
| "loss": 0.0, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.038, | |
| "grad_norm": 5.854560375213623, | |
| "learning_rate": 9.619999999999999e-07, | |
| "loss": 0.0, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.0385, | |
| "grad_norm": 8.13651180267334, | |
| "learning_rate": 9.615e-07, | |
| "loss": -0.0, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.039, | |
| "grad_norm": 8.167058944702148, | |
| "learning_rate": 9.61e-07, | |
| "loss": -0.0, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0395, | |
| "grad_norm": 5.878276348114014, | |
| "learning_rate": 9.605e-07, | |
| "loss": 0.0, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 12.290175437927246, | |
| "learning_rate": 9.6e-07, | |
| "loss": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0405, | |
| "grad_norm": 4.8677496910095215, | |
| "learning_rate": 9.594999999999999e-07, | |
| "loss": 0.0, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.041, | |
| "grad_norm": 9.993011474609375, | |
| "learning_rate": 9.589999999999998e-07, | |
| "loss": 0.0, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.0415, | |
| "grad_norm": 7.9544477462768555, | |
| "learning_rate": 9.585e-07, | |
| "loss": 0.0, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.042, | |
| "grad_norm": 8.334663391113281, | |
| "learning_rate": 9.58e-07, | |
| "loss": -0.0, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.0425, | |
| "grad_norm": 21.026262283325195, | |
| "learning_rate": 9.575e-07, | |
| "loss": -0.0, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.043, | |
| "grad_norm": 13.211177825927734, | |
| "learning_rate": 9.57e-07, | |
| "loss": 0.0, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.0435, | |
| "grad_norm": 9.141230583190918, | |
| "learning_rate": 9.565e-07, | |
| "loss": 0.0, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.044, | |
| "grad_norm": 7.934508800506592, | |
| "learning_rate": 9.559999999999998e-07, | |
| "loss": -0.0, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.0445, | |
| "grad_norm": 8.56117057800293, | |
| "learning_rate": 9.555e-07, | |
| "loss": 0.0, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.045, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.55e-07, | |
| "loss": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0455, | |
| "grad_norm": 15.598448753356934, | |
| "learning_rate": 9.545e-07, | |
| "loss": 0.0, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.046, | |
| "grad_norm": 9.095897674560547, | |
| "learning_rate": 9.539999999999999e-07, | |
| "loss": -0.0, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.0465, | |
| "grad_norm": 4.865746974945068, | |
| "learning_rate": 9.535e-07, | |
| "loss": -0.0, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.047, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.529999999999999e-07, | |
| "loss": 0.0, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.0475, | |
| "grad_norm": 5.1494951248168945, | |
| "learning_rate": 9.525e-07, | |
| "loss": 0.0, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 11.34716510772705, | |
| "learning_rate": 9.52e-07, | |
| "loss": 0.0, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.0485, | |
| "grad_norm": 11.986861228942871, | |
| "learning_rate": 9.515e-07, | |
| "loss": 0.0, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.049, | |
| "grad_norm": 7.944230079650879, | |
| "learning_rate": 9.509999999999999e-07, | |
| "loss": 0.0, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.0495, | |
| "grad_norm": 7.5184783935546875, | |
| "learning_rate": 9.504999999999999e-07, | |
| "loss": -0.0, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 4.20994758605957, | |
| "learning_rate": 9.499999999999999e-07, | |
| "loss": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0505, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.495e-07, | |
| "loss": 0.0, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.051, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.489999999999999e-07, | |
| "loss": 0.0, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.0515, | |
| "grad_norm": 7.179519176483154, | |
| "learning_rate": 9.485e-07, | |
| "loss": -0.0, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.052, | |
| "grad_norm": 8.312400817871094, | |
| "learning_rate": 9.479999999999999e-07, | |
| "loss": 0.0, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.0525, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.474999999999999e-07, | |
| "loss": 0.0, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.053, | |
| "grad_norm": 6.276727676391602, | |
| "learning_rate": 9.469999999999999e-07, | |
| "loss": 0.0, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.0535, | |
| "grad_norm": 6.952809810638428, | |
| "learning_rate": 9.465e-07, | |
| "loss": 0.0, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.054, | |
| "grad_norm": 12.95068645477295, | |
| "learning_rate": 9.459999999999999e-07, | |
| "loss": -0.0, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.0545, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.455e-07, | |
| "loss": 0.0, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.055, | |
| "grad_norm": 13.65576457977295, | |
| "learning_rate": 9.45e-07, | |
| "loss": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0555, | |
| "grad_norm": 8.414222717285156, | |
| "learning_rate": 9.444999999999999e-07, | |
| "loss": 0.0, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.056, | |
| "grad_norm": 7.828263759613037, | |
| "learning_rate": 9.439999999999999e-07, | |
| "loss": 0.0, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.0565, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.434999999999999e-07, | |
| "loss": 0.0, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.057, | |
| "grad_norm": 7.849336624145508, | |
| "learning_rate": 9.429999999999999e-07, | |
| "loss": 0.0, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.0575, | |
| "grad_norm": 13.594552993774414, | |
| "learning_rate": 9.425e-07, | |
| "loss": 0.0, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.058, | |
| "grad_norm": 6.633617877960205, | |
| "learning_rate": 9.419999999999999e-07, | |
| "loss": 0.0, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.0585, | |
| "grad_norm": 7.893250942230225, | |
| "learning_rate": 9.415e-07, | |
| "loss": -0.0, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.059, | |
| "grad_norm": 7.897842884063721, | |
| "learning_rate": 9.409999999999999e-07, | |
| "loss": -0.0, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.0595, | |
| "grad_norm": 7.738225936889648, | |
| "learning_rate": 9.404999999999999e-07, | |
| "loss": 0.0, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 10.054285049438477, | |
| "learning_rate": 9.399999999999999e-07, | |
| "loss": 0.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0605, | |
| "grad_norm": 6.2317328453063965, | |
| "learning_rate": 9.395e-07, | |
| "loss": 0.0, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.061, | |
| "grad_norm": 7.4707207679748535, | |
| "learning_rate": 9.389999999999999e-07, | |
| "loss": -0.0, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.0615, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.385e-07, | |
| "loss": 0.0, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.062, | |
| "grad_norm": 6.883451461791992, | |
| "learning_rate": 9.379999999999998e-07, | |
| "loss": 0.0, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.0625, | |
| "grad_norm": 5.7558274269104, | |
| "learning_rate": 9.374999999999999e-07, | |
| "loss": 0.0, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.063, | |
| "grad_norm": 4.654928207397461, | |
| "learning_rate": 9.37e-07, | |
| "loss": -0.0, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.0635, | |
| "grad_norm": 13.459746360778809, | |
| "learning_rate": 9.365e-07, | |
| "loss": 0.0, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 6.189227104187012, | |
| "learning_rate": 9.36e-07, | |
| "loss": -0.0, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.0645, | |
| "grad_norm": 15.807933807373047, | |
| "learning_rate": 9.355e-07, | |
| "loss": -0.0, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.065, | |
| "grad_norm": 8.20335865020752, | |
| "learning_rate": 9.35e-07, | |
| "loss": -0.0, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0655, | |
| "grad_norm": 7.410068511962891, | |
| "learning_rate": 9.344999999999999e-07, | |
| "loss": 0.0, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.066, | |
| "grad_norm": 5.982290744781494, | |
| "learning_rate": 9.34e-07, | |
| "loss": 0.0, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.0665, | |
| "grad_norm": 7.302867889404297, | |
| "learning_rate": 9.334999999999999e-07, | |
| "loss": 0.0, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.067, | |
| "grad_norm": 7.16635799407959, | |
| "learning_rate": 9.33e-07, | |
| "loss": 0.0, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.0675, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.325e-07, | |
| "loss": 0.0, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.068, | |
| "grad_norm": 5.66601037979126, | |
| "learning_rate": 9.32e-07, | |
| "loss": -0.0, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.0685, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.315e-07, | |
| "loss": 0.0, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.069, | |
| "grad_norm": 12.146499633789062, | |
| "learning_rate": 9.31e-07, | |
| "loss": -0.0, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.0695, | |
| "grad_norm": 6.333805084228516, | |
| "learning_rate": 9.304999999999999e-07, | |
| "loss": 0.0, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 17.41741943359375, | |
| "learning_rate": 9.3e-07, | |
| "loss": -0.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0705, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.295e-07, | |
| "loss": 0.0, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.071, | |
| "grad_norm": 18.96269989013672, | |
| "learning_rate": 9.29e-07, | |
| "loss": 0.0, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.0715, | |
| "grad_norm": 30.19170570373535, | |
| "learning_rate": 9.285e-07, | |
| "loss": 0.0, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 12.67878532409668, | |
| "learning_rate": 9.28e-07, | |
| "loss": -0.0, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.0725, | |
| "grad_norm": 16.92245101928711, | |
| "learning_rate": 9.274999999999999e-07, | |
| "loss": 0.0, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.073, | |
| "grad_norm": 8.775379180908203, | |
| "learning_rate": 9.27e-07, | |
| "loss": 0.0, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.0735, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.264999999999999e-07, | |
| "loss": 0.0, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.074, | |
| "grad_norm": 12.122485160827637, | |
| "learning_rate": 9.26e-07, | |
| "loss": 0.0, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.0745, | |
| "grad_norm": 41.2854118347168, | |
| "learning_rate": 9.255e-07, | |
| "loss": 0.0, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.075, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.25e-07, | |
| "loss": 0.0, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0755, | |
| "grad_norm": 12.417732238769531, | |
| "learning_rate": 9.244999999999999e-07, | |
| "loss": 0.0, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.076, | |
| "grad_norm": 23.242403030395508, | |
| "learning_rate": 9.24e-07, | |
| "loss": 0.0, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.0765, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.234999999999999e-07, | |
| "loss": 0.0, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.077, | |
| "grad_norm": 8.696711540222168, | |
| "learning_rate": 9.23e-07, | |
| "loss": -0.0, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.0775, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.225e-07, | |
| "loss": 0.0, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.078, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.22e-07, | |
| "loss": 0.0, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.0785, | |
| "grad_norm": 12.881440162658691, | |
| "learning_rate": 9.215e-07, | |
| "loss": -0.0, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.079, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.21e-07, | |
| "loss": 0.0, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.0795, | |
| "grad_norm": 21.86204719543457, | |
| "learning_rate": 9.204999999999999e-07, | |
| "loss": 0.0, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 16.32013702392578, | |
| "learning_rate": 9.2e-07, | |
| "loss": -0.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0805, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.194999999999999e-07, | |
| "loss": 0.0, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.081, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.19e-07, | |
| "loss": 0.0, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.0815, | |
| "grad_norm": 21.536087036132812, | |
| "learning_rate": 9.185e-07, | |
| "loss": 0.0, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.082, | |
| "grad_norm": 15.687423706054688, | |
| "learning_rate": 9.18e-07, | |
| "loss": 0.0, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.0825, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.174999999999999e-07, | |
| "loss": 0.0, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.083, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.17e-07, | |
| "loss": 0.0, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.0835, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.164999999999999e-07, | |
| "loss": 0.0, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.084, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.16e-07, | |
| "loss": 0.0, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.0845, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.155e-07, | |
| "loss": 0.0, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.085, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.15e-07, | |
| "loss": 0.0, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0855, | |
| "grad_norm": 25.705774307250977, | |
| "learning_rate": 9.145e-07, | |
| "loss": -0.0, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.086, | |
| "grad_norm": 21.59645652770996, | |
| "learning_rate": 9.14e-07, | |
| "loss": -0.0, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.0865, | |
| "grad_norm": 10.857905387878418, | |
| "learning_rate": 9.134999999999999e-07, | |
| "loss": -0.0, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.087, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.13e-07, | |
| "loss": 0.0, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.0875, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.124999999999999e-07, | |
| "loss": 0.0, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.088, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.12e-07, | |
| "loss": 0.0, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.0885, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.115e-07, | |
| "loss": 0.0, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.089, | |
| "grad_norm": 20.786745071411133, | |
| "learning_rate": 9.109999999999999e-07, | |
| "loss": 0.0, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.0895, | |
| "grad_norm": 8.460957527160645, | |
| "learning_rate": 9.104999999999999e-07, | |
| "loss": -0.0, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.1e-07, | |
| "loss": 0.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.0905, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.094999999999999e-07, | |
| "loss": 0.0, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.091, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.09e-07, | |
| "loss": 0.0, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.0915, | |
| "grad_norm": 49.33989715576172, | |
| "learning_rate": 9.085e-07, | |
| "loss": -0.0, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.092, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.08e-07, | |
| "loss": 0.0, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.0925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.074999999999999e-07, | |
| "loss": 0.0, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.093, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.07e-07, | |
| "loss": 0.0, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.0935, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.064999999999999e-07, | |
| "loss": 0.0, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.094, | |
| "grad_norm": 16.010793685913086, | |
| "learning_rate": 9.06e-07, | |
| "loss": 0.0, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.0945, | |
| "grad_norm": 17.950115203857422, | |
| "learning_rate": 9.055e-07, | |
| "loss": 0.0, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.095, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.05e-07, | |
| "loss": 0.0, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0955, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.045e-07, | |
| "loss": 0.0, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 8.419339179992676, | |
| "learning_rate": 9.039999999999999e-07, | |
| "loss": -0.0, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.0965, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.034999999999999e-07, | |
| "loss": 0.0, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.097, | |
| "grad_norm": 17.22492790222168, | |
| "learning_rate": 9.03e-07, | |
| "loss": -0.0, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.0975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.024999999999999e-07, | |
| "loss": 0.0, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.098, | |
| "grad_norm": 15.984553337097168, | |
| "learning_rate": 9.02e-07, | |
| "loss": 0.0, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.0985, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.015e-07, | |
| "loss": 0.0, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.099, | |
| "grad_norm": 11.981531143188477, | |
| "learning_rate": 9.01e-07, | |
| "loss": 0.0, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.0995, | |
| "grad_norm": 0.0, | |
| "learning_rate": 9.004999999999999e-07, | |
| "loss": 0.0, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 16.9019832611084, | |
| "learning_rate": 9e-07, | |
| "loss": -0.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1005, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.994999999999999e-07, | |
| "loss": 0.0, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.101, | |
| "grad_norm": 10.651970863342285, | |
| "learning_rate": 8.99e-07, | |
| "loss": 0.0, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.1015, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.985e-07, | |
| "loss": 0.0, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.102, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.98e-07, | |
| "loss": 0.0, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.1025, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.974999999999999e-07, | |
| "loss": 0.0, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.103, | |
| "grad_norm": 33.05813980102539, | |
| "learning_rate": 8.969999999999999e-07, | |
| "loss": 0.0, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.1035, | |
| "grad_norm": 26.88140296936035, | |
| "learning_rate": 8.964999999999999e-07, | |
| "loss": 0.0, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.104, | |
| "grad_norm": 18.670848846435547, | |
| "learning_rate": 8.96e-07, | |
| "loss": -0.0, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.1045, | |
| "grad_norm": 18.841079711914062, | |
| "learning_rate": 8.954999999999999e-07, | |
| "loss": -0.0, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.105, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.95e-07, | |
| "loss": 0.0, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1055, | |
| "grad_norm": 13.156370162963867, | |
| "learning_rate": 8.945e-07, | |
| "loss": 0.0, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.106, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.939999999999999e-07, | |
| "loss": 0.0, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.1065, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.934999999999999e-07, | |
| "loss": 0.0, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.107, | |
| "grad_norm": 23.25225830078125, | |
| "learning_rate": 8.93e-07, | |
| "loss": 0.0, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.1075, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.924999999999999e-07, | |
| "loss": 0.0, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.108, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.92e-07, | |
| "loss": 0.0, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.1085, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.915e-07, | |
| "loss": 0.0, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.109, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.91e-07, | |
| "loss": 0.0, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.1095, | |
| "grad_norm": 57.88274383544922, | |
| "learning_rate": 8.904999999999999e-07, | |
| "loss": 0.0, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 31.124988555908203, | |
| "learning_rate": 8.9e-07, | |
| "loss": 0.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1105, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.894999999999999e-07, | |
| "loss": 0.0, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.111, | |
| "grad_norm": 22.94927215576172, | |
| "learning_rate": 8.89e-07, | |
| "loss": -0.0, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.1115, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.884999999999999e-07, | |
| "loss": 0.0, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.88e-07, | |
| "loss": 0.0, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.1125, | |
| "grad_norm": 22.883502960205078, | |
| "learning_rate": 8.874999999999999e-07, | |
| "loss": 0.0, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.113, | |
| "grad_norm": 10.071247100830078, | |
| "learning_rate": 8.869999999999999e-07, | |
| "loss": 0.0, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1135, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.864999999999999e-07, | |
| "loss": 0.0, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.114, | |
| "grad_norm": 231.0457305908203, | |
| "learning_rate": 8.86e-07, | |
| "loss": -0.0, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.1145, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.854999999999999e-07, | |
| "loss": 0.0, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.115, | |
| "grad_norm": 23.97252655029297, | |
| "learning_rate": 8.85e-07, | |
| "loss": 0.0, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1155, | |
| "grad_norm": 15.410896301269531, | |
| "learning_rate": 8.845e-07, | |
| "loss": 0.0, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.116, | |
| "grad_norm": 39.541412353515625, | |
| "learning_rate": 8.839999999999999e-07, | |
| "loss": 0.0, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.1165, | |
| "grad_norm": 13.713851928710938, | |
| "learning_rate": 8.834999999999999e-07, | |
| "loss": 0.0, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.117, | |
| "grad_norm": 35.34727096557617, | |
| "learning_rate": 8.83e-07, | |
| "loss": -0.0, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.1175, | |
| "grad_norm": 45.32273864746094, | |
| "learning_rate": 8.824999999999999e-07, | |
| "loss": 0.0, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.118, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.82e-07, | |
| "loss": 0.0, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.1185, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.814999999999999e-07, | |
| "loss": 0.0, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.119, | |
| "grad_norm": 267.7450256347656, | |
| "learning_rate": 8.81e-07, | |
| "loss": 0.0, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.1195, | |
| "grad_norm": 143.29161071777344, | |
| "learning_rate": 8.804999999999999e-07, | |
| "loss": -0.0, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 52.909034729003906, | |
| "learning_rate": 8.799999999999999e-07, | |
| "loss": -0.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1205, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.794999999999999e-07, | |
| "loss": 0.0, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.121, | |
| "grad_norm": 37.857696533203125, | |
| "learning_rate": 8.79e-07, | |
| "loss": 0.0, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.1215, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.784999999999999e-07, | |
| "loss": 0.0, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.122, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.78e-07, | |
| "loss": 0.0, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.1225, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.774999999999999e-07, | |
| "loss": 0.0, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.123, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.769999999999999e-07, | |
| "loss": 0.0, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.1235, | |
| "grad_norm": 30.24044418334961, | |
| "learning_rate": 8.764999999999999e-07, | |
| "loss": 0.0, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.124, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.76e-07, | |
| "loss": 0.0, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.1245, | |
| "grad_norm": 33.06248092651367, | |
| "learning_rate": 8.754999999999999e-07, | |
| "loss": 0.0, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 20.05577278137207, | |
| "learning_rate": 8.75e-07, | |
| "loss": -0.0, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1255, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.745000000000001e-07, | |
| "loss": 0.0, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.126, | |
| "grad_norm": 18.56123161315918, | |
| "learning_rate": 8.739999999999999e-07, | |
| "loss": 0.0, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.1265, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.735e-07, | |
| "loss": 0.0, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.127, | |
| "grad_norm": 12.27500057220459, | |
| "learning_rate": 8.729999999999999e-07, | |
| "loss": 0.0, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.1275, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.725e-07, | |
| "loss": 0.0, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 53.35928726196289, | |
| "learning_rate": 8.72e-07, | |
| "loss": -0.0, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.1285, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.715e-07, | |
| "loss": 0.0, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.129, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.71e-07, | |
| "loss": 0.0, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.1295, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.705e-07, | |
| "loss": 0.0, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.699999999999999e-07, | |
| "loss": 0.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.1305, | |
| "grad_norm": 40.95280838012695, | |
| "learning_rate": 8.695e-07, | |
| "loss": 0.0, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.131, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.69e-07, | |
| "loss": 0.0, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.1315, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.685e-07, | |
| "loss": 0.0, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.132, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.68e-07, | |
| "loss": 0.0, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.1325, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.675000000000001e-07, | |
| "loss": 0.0, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.133, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.669999999999999e-07, | |
| "loss": 0.0, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.1335, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.665e-07, | |
| "loss": 0.0, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.134, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.659999999999999e-07, | |
| "loss": 0.0, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.1345, | |
| "grad_norm": 29.156984329223633, | |
| "learning_rate": 8.655e-07, | |
| "loss": -0.0, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.135, | |
| "grad_norm": 25.566734313964844, | |
| "learning_rate": 8.65e-07, | |
| "loss": 0.0, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1355, | |
| "grad_norm": 90.18716430664062, | |
| "learning_rate": 8.645e-07, | |
| "loss": 0.0, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.136, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.639999999999999e-07, | |
| "loss": 0.0, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.1365, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.635e-07, | |
| "loss": 0.0, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.137, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.629999999999999e-07, | |
| "loss": 0.0, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.1375, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.625e-07, | |
| "loss": 0.0, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.138, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.62e-07, | |
| "loss": 0.0, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.1385, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.615e-07, | |
| "loss": 0.0, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.139, | |
| "grad_norm": 74.6231460571289, | |
| "learning_rate": 8.61e-07, | |
| "loss": 0.0, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.1395, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.605e-07, | |
| "loss": 0.0, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.599999999999999e-07, | |
| "loss": 0.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.1405, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.595e-07, | |
| "loss": 0.0, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.141, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.59e-07, | |
| "loss": 0.0, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.1415, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.585e-07, | |
| "loss": 0.0, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.142, | |
| "grad_norm": 562.8270263671875, | |
| "learning_rate": 8.58e-07, | |
| "loss": 0.0, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.1425, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.575e-07, | |
| "loss": 0.0, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.143, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.569999999999999e-07, | |
| "loss": 0.0, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1435, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.565e-07, | |
| "loss": 0.0, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.559999999999999e-07, | |
| "loss": 0.0, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.1445, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.555e-07, | |
| "loss": 0.0, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.145, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.55e-07, | |
| "loss": 0.0, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.1455, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.545e-07, | |
| "loss": 0.0, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.146, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.539999999999999e-07, | |
| "loss": 0.0, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.1465, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.535e-07, | |
| "loss": 0.0, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.147, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.529999999999999e-07, | |
| "loss": 0.0, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.1475, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.525e-07, | |
| "loss": 0.0, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.148, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.52e-07, | |
| "loss": 0.0, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.1485, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.515e-07, | |
| "loss": 0.0, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.149, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.51e-07, | |
| "loss": 0.0, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.1495, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.504999999999999e-07, | |
| "loss": 0.0, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.499999999999999e-07, | |
| "loss": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1505, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.495e-07, | |
| "loss": 0.0, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.151, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.489999999999999e-07, | |
| "loss": 0.0, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.1515, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.485e-07, | |
| "loss": 0.0, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.152, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.48e-07, | |
| "loss": 0.0, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.1525, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.475e-07, | |
| "loss": 0.0, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.153, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.469999999999999e-07, | |
| "loss": 0.0, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.1535, | |
| "grad_norm": 53.436363220214844, | |
| "learning_rate": 8.465e-07, | |
| "loss": 0.0, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.154, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.459999999999999e-07, | |
| "loss": 0.0, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.1545, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.455e-07, | |
| "loss": 0.0, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.155, | |
| "grad_norm": 45.34641647338867, | |
| "learning_rate": 8.45e-07, | |
| "loss": -0.0, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1555, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.445e-07, | |
| "loss": 0.0, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.156, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.439999999999999e-07, | |
| "loss": 0.0, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.1565, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.435e-07, | |
| "loss": 0.0, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.157, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.429999999999999e-07, | |
| "loss": 0.0, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.1575, | |
| "grad_norm": 207.4761962890625, | |
| "learning_rate": 8.425e-07, | |
| "loss": -0.0, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.158, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.419999999999999e-07, | |
| "loss": 0.0, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.1585, | |
| "grad_norm": 49.840850830078125, | |
| "learning_rate": 8.415e-07, | |
| "loss": -0.0, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.159, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.41e-07, | |
| "loss": 0.0, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.1595, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.404999999999999e-07, | |
| "loss": 0.0, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.399999999999999e-07, | |
| "loss": 0.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1605, | |
| "grad_norm": 42.99878692626953, | |
| "learning_rate": 8.395e-07, | |
| "loss": -0.0, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.161, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.389999999999999e-07, | |
| "loss": 0.0, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.1615, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.385e-07, | |
| "loss": 0.0, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.162, | |
| "grad_norm": 26.691635131835938, | |
| "learning_rate": 8.38e-07, | |
| "loss": 0.0, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.1625, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.375e-07, | |
| "loss": 0.0, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.163, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.369999999999999e-07, | |
| "loss": 0.0, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.1635, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.365e-07, | |
| "loss": 0.0, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.164, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.359999999999999e-07, | |
| "loss": 0.0, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.1645, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.355e-07, | |
| "loss": 0.0, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.165, | |
| "grad_norm": 78.05026245117188, | |
| "learning_rate": 8.349999999999999e-07, | |
| "loss": -0.0, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1655, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.345e-07, | |
| "loss": 0.0, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.166, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.34e-07, | |
| "loss": 0.0, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.1665, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.334999999999999e-07, | |
| "loss": 0.0, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.167, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.329999999999999e-07, | |
| "loss": 0.0, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.1675, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.325e-07, | |
| "loss": 0.0, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.319999999999999e-07, | |
| "loss": 0.0, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.1685, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.315e-07, | |
| "loss": 0.0, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.169, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.31e-07, | |
| "loss": 0.0, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.1695, | |
| "grad_norm": 54.89845657348633, | |
| "learning_rate": 8.304999999999999e-07, | |
| "loss": -0.0, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.299999999999999e-07, | |
| "loss": 0.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1705, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.295e-07, | |
| "loss": 0.0, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.171, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.289999999999999e-07, | |
| "loss": 0.0, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.1715, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.285e-07, | |
| "loss": 0.0, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.172, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.28e-07, | |
| "loss": 0.0, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.1725, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.275e-07, | |
| "loss": 0.0, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.173, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.269999999999999e-07, | |
| "loss": 0.0, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.1735, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.264999999999999e-07, | |
| "loss": 0.0, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.174, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.259999999999999e-07, | |
| "loss": 0.0, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.1745, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.255e-07, | |
| "loss": 0.0, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.175, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.249999999999999e-07, | |
| "loss": 0.0, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1755, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.245e-07, | |
| "loss": 0.0, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.24e-07, | |
| "loss": 0.0, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.1765, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.234999999999999e-07, | |
| "loss": 0.0, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.177, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.229999999999999e-07, | |
| "loss": 0.0, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.1775, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.225e-07, | |
| "loss": 0.0, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.178, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.219999999999999e-07, | |
| "loss": 0.0, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.1785, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.215e-07, | |
| "loss": 0.0, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.179, | |
| "grad_norm": 95.88402557373047, | |
| "learning_rate": 8.21e-07, | |
| "loss": 0.0, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.1795, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.205e-07, | |
| "loss": 0.0, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.199999999999999e-07, | |
| "loss": 0.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1805, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.194999999999999e-07, | |
| "loss": 0.0, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.181, | |
| "grad_norm": 16.117612838745117, | |
| "learning_rate": 8.189999999999999e-07, | |
| "loss": 0.0, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.1815, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.185e-07, | |
| "loss": 0.0, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.182, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.179999999999999e-07, | |
| "loss": 0.0, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.1825, | |
| "grad_norm": 82.06559753417969, | |
| "learning_rate": 8.175e-07, | |
| "loss": 0.0, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.183, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.169999999999999e-07, | |
| "loss": 0.0, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.1835, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.164999999999999e-07, | |
| "loss": 0.0, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.184, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.159999999999999e-07, | |
| "loss": 0.0, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.1845, | |
| "grad_norm": 134.08810424804688, | |
| "learning_rate": 8.155e-07, | |
| "loss": 0.0, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.185, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.149999999999999e-07, | |
| "loss": 0.0, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1855, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.145e-07, | |
| "loss": 0.0, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.186, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.14e-07, | |
| "loss": 0.0, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.1865, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.134999999999999e-07, | |
| "loss": 0.0, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.187, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.129999999999999e-07, | |
| "loss": 0.0, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.1875, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.125e-07, | |
| "loss": 0.0, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.188, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.12e-07, | |
| "loss": 0.0, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.1885, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.115e-07, | |
| "loss": 0.0, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.189, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.11e-07, | |
| "loss": 0.0, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.1895, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.105e-07, | |
| "loss": 0.0, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.1e-07, | |
| "loss": 0.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1905, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.094999999999999e-07, | |
| "loss": 0.0, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.191, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.09e-07, | |
| "loss": 0.0, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.1915, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.085e-07, | |
| "loss": 0.0, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.08e-07, | |
| "loss": 0.0, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.1925, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.075e-07, | |
| "loss": 0.0, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.193, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.070000000000001e-07, | |
| "loss": 0.0, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.1935, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.064999999999999e-07, | |
| "loss": 0.0, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.194, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.06e-07, | |
| "loss": 0.0, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.1945, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.055e-07, | |
| "loss": 0.0, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.195, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.05e-07, | |
| "loss": 0.0, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1955, | |
| "grad_norm": 15.130922317504883, | |
| "learning_rate": 8.045e-07, | |
| "loss": 0.0, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.196, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.04e-07, | |
| "loss": 0.0, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.1965, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.034999999999999e-07, | |
| "loss": 0.0, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.197, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.03e-07, | |
| "loss": 0.0, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.1975, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.024999999999999e-07, | |
| "loss": 0.0, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.198, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.02e-07, | |
| "loss": 0.0, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.1985, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.015e-07, | |
| "loss": 0.0, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.199, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.01e-07, | |
| "loss": 0.0, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.1995, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8.005e-07, | |
| "loss": 0.0, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.0, | |
| "learning_rate": 8e-07, | |
| "loss": 0.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2005, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.994999999999999e-07, | |
| "loss": 0.0, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.201, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.99e-07, | |
| "loss": 0.0, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.2015, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.985e-07, | |
| "loss": 0.0, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.202, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.98e-07, | |
| "loss": 0.0, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.2025, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.975e-07, | |
| "loss": 0.0, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.203, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.970000000000001e-07, | |
| "loss": 0.0, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2035, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.964999999999999e-07, | |
| "loss": 0.0, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.204, | |
| "grad_norm": 139.8319854736328, | |
| "learning_rate": 7.96e-07, | |
| "loss": 0.0, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.2045, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.954999999999999e-07, | |
| "loss": 0.0, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.205, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.95e-07, | |
| "loss": 0.0, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2055, | |
| "grad_norm": 72.6037368774414, | |
| "learning_rate": 7.945e-07, | |
| "loss": 0.0, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.206, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.94e-07, | |
| "loss": 0.0, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.2065, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.934999999999999e-07, | |
| "loss": 0.0, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.207, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.93e-07, | |
| "loss": 0.0, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.2075, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.924999999999999e-07, | |
| "loss": 0.0, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.92e-07, | |
| "loss": 0.0, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.2085, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.915e-07, | |
| "loss": 0.0, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.209, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.91e-07, | |
| "loss": 0.0, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.2095, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.905e-07, | |
| "loss": 0.0, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.9e-07, | |
| "loss": 0.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2105, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.894999999999999e-07, | |
| "loss": 0.0, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.211, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.89e-07, | |
| "loss": 0.0, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.2115, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.884999999999999e-07, | |
| "loss": 0.0, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.212, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.88e-07, | |
| "loss": 0.0, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.2125, | |
| "grad_norm": 66.85465240478516, | |
| "learning_rate": 7.875e-07, | |
| "loss": 0.0, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.213, | |
| "grad_norm": 108.80921936035156, | |
| "learning_rate": 7.87e-07, | |
| "loss": -0.0, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.2135, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.864999999999999e-07, | |
| "loss": 0.0, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.214, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.86e-07, | |
| "loss": 0.0, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.2145, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.854999999999999e-07, | |
| "loss": 0.0, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.215, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.85e-07, | |
| "loss": 0.0, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2155, | |
| "grad_norm": 107.53791046142578, | |
| "learning_rate": 7.845e-07, | |
| "loss": -0.0, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.84e-07, | |
| "loss": 0.0, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.2165, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.834999999999999e-07, | |
| "loss": 0.0, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.217, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.83e-07, | |
| "loss": 0.0, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.2175, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.824999999999999e-07, | |
| "loss": 0.0, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.218, | |
| "grad_norm": 184.61976623535156, | |
| "learning_rate": 7.82e-07, | |
| "loss": 0.0, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.2185, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.815e-07, | |
| "loss": 0.0, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.219, | |
| "grad_norm": 73.76115417480469, | |
| "learning_rate": 7.81e-07, | |
| "loss": 0.0, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.2195, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.805e-07, | |
| "loss": 0.0, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.799999999999999e-07, | |
| "loss": 0.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2205, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.794999999999999e-07, | |
| "loss": 0.0, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.221, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.79e-07, | |
| "loss": 0.0, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.2215, | |
| "grad_norm": 82.87494659423828, | |
| "learning_rate": 7.784999999999999e-07, | |
| "loss": 0.0, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.222, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.78e-07, | |
| "loss": 0.0, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.2225, | |
| "grad_norm": 126.44339752197266, | |
| "learning_rate": 7.775e-07, | |
| "loss": -0.0, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.223, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.77e-07, | |
| "loss": 0.0, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.2235, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.764999999999999e-07, | |
| "loss": 0.0, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.76e-07, | |
| "loss": 0.0, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.2245, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.754999999999999e-07, | |
| "loss": 0.0, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.225, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.75e-07, | |
| "loss": 0.0, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2255, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.745e-07, | |
| "loss": 0.0, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.226, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.74e-07, | |
| "loss": 0.0, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.2265, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.734999999999999e-07, | |
| "loss": 0.0, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.227, | |
| "grad_norm": 37.326351165771484, | |
| "learning_rate": 7.729999999999999e-07, | |
| "loss": 0.0, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.2275, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.724999999999999e-07, | |
| "loss": 0.0, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.228, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.72e-07, | |
| "loss": 0.0, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.2285, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.714999999999999e-07, | |
| "loss": 0.0, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.229, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.71e-07, | |
| "loss": 0.0, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.2295, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.705e-07, | |
| "loss": 0.0, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.699999999999999e-07, | |
| "loss": 0.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2305, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.694999999999999e-07, | |
| "loss": 0.0, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.231, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.69e-07, | |
| "loss": 0.0, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.2315, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.684999999999999e-07, | |
| "loss": 0.0, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.232, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.68e-07, | |
| "loss": 0.0, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.2325, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.675e-07, | |
| "loss": 0.0, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.233, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.67e-07, | |
| "loss": 0.0, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.2335, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.664999999999999e-07, | |
| "loss": 0.0, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.234, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.66e-07, | |
| "loss": 0.0, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.2345, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.654999999999999e-07, | |
| "loss": 0.0, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.235, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.65e-07, | |
| "loss": 0.0, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2355, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.644999999999999e-07, | |
| "loss": 0.0, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.236, | |
| "grad_norm": 67.02527618408203, | |
| "learning_rate": 7.64e-07, | |
| "loss": -0.0, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.2365, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.635e-07, | |
| "loss": 0.0, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.237, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.629999999999999e-07, | |
| "loss": 0.0, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.2375, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.624999999999999e-07, | |
| "loss": 0.0, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.238, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.62e-07, | |
| "loss": 0.0, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.2385, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.614999999999999e-07, | |
| "loss": 0.0, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.239, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.61e-07, | |
| "loss": 0.0, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.2395, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.605e-07, | |
| "loss": 0.0, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.599999999999999e-07, | |
| "loss": 0.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2405, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.594999999999999e-07, | |
| "loss": 0.0, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.241, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.59e-07, | |
| "loss": 0.0, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.2415, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.584999999999999e-07, | |
| "loss": 0.0, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.242, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.58e-07, | |
| "loss": 0.0, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.2425, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.575e-07, | |
| "loss": 0.0, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.243, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.57e-07, | |
| "loss": 0.0, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.2435, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.564999999999999e-07, | |
| "loss": 0.0, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.244, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.559999999999999e-07, | |
| "loss": 0.0, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.2445, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.554999999999999e-07, | |
| "loss": 0.0, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.245, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.55e-07, | |
| "loss": 0.0, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.2455, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.544999999999999e-07, | |
| "loss": 0.0, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.246, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.54e-07, | |
| "loss": 0.0, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.2465, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.535e-07, | |
| "loss": 0.0, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.247, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.529999999999999e-07, | |
| "loss": 0.0, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.2475, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.524999999999999e-07, | |
| "loss": 0.0, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.248, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.52e-07, | |
| "loss": 0.0, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.2485, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.514999999999999e-07, | |
| "loss": 0.0, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.249, | |
| "grad_norm": 59.718631744384766, | |
| "learning_rate": 7.51e-07, | |
| "loss": 0.0, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.2495, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.505e-07, | |
| "loss": 0.0, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.0, | |
| "learning_rate": 7.5e-07, | |
| "loss": 0.0, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 2000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |