| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 950, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005263157894736842, | |
| "grad_norm": 7.792475124460211, | |
| "learning_rate": 8.421052631578948e-07, | |
| "loss": 1.844, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010526315789473684, | |
| "grad_norm": 7.77107258632735, | |
| "learning_rate": 1.6842105263157895e-06, | |
| "loss": 1.8342, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.015789473684210527, | |
| "grad_norm": 7.734735147966475, | |
| "learning_rate": 2.5263157894736844e-06, | |
| "loss": 1.8324, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.021052631578947368, | |
| "grad_norm": 7.248228363662717, | |
| "learning_rate": 3.368421052631579e-06, | |
| "loss": 1.816, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02631578947368421, | |
| "grad_norm": 5.778539169912221, | |
| "learning_rate": 4.210526315789474e-06, | |
| "loss": 1.7849, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.031578947368421054, | |
| "grad_norm": 3.239108511490633, | |
| "learning_rate": 5.052631578947369e-06, | |
| "loss": 1.7388, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03684210526315789, | |
| "grad_norm": 2.659157940796987, | |
| "learning_rate": 5.8947368421052634e-06, | |
| "loss": 1.7195, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.042105263157894736, | |
| "grad_norm": 5.788438535248494, | |
| "learning_rate": 6.736842105263158e-06, | |
| "loss": 1.716, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04736842105263158, | |
| "grad_norm": 5.78966752434045, | |
| "learning_rate": 7.578947368421054e-06, | |
| "loss": 1.7132, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05263157894736842, | |
| "grad_norm": 5.662263812934936, | |
| "learning_rate": 8.421052631578948e-06, | |
| "loss": 1.7155, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05789473684210526, | |
| "grad_norm": 4.3906745364224955, | |
| "learning_rate": 9.263157894736842e-06, | |
| "loss": 1.6617, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.06315789473684211, | |
| "grad_norm": 3.7326640381933247, | |
| "learning_rate": 1.0105263157894738e-05, | |
| "loss": 1.6634, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06842105263157895, | |
| "grad_norm": 2.669371108068819, | |
| "learning_rate": 1.0947368421052633e-05, | |
| "loss": 1.6463, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07368421052631578, | |
| "grad_norm": 2.080061860580032, | |
| "learning_rate": 1.1789473684210527e-05, | |
| "loss": 1.5925, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07894736842105263, | |
| "grad_norm": 2.245619823013294, | |
| "learning_rate": 1.263157894736842e-05, | |
| "loss": 1.5797, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08421052631578947, | |
| "grad_norm": 2.0053863294690806, | |
| "learning_rate": 1.3473684210526316e-05, | |
| "loss": 1.5613, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08947368421052632, | |
| "grad_norm": 1.9625639332300135, | |
| "learning_rate": 1.4315789473684212e-05, | |
| "loss": 1.5638, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.09473684210526316, | |
| "grad_norm": 1.5893875551469467, | |
| "learning_rate": 1.5157894736842107e-05, | |
| "loss": 1.5522, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.5906140921890974, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 1.5209, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 1.559169647702599, | |
| "learning_rate": 1.6842105263157896e-05, | |
| "loss": 1.4911, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11052631578947368, | |
| "grad_norm": 1.2982321447862488, | |
| "learning_rate": 1.768421052631579e-05, | |
| "loss": 1.4991, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.11578947368421053, | |
| "grad_norm": 1.3143336518371307, | |
| "learning_rate": 1.8526315789473684e-05, | |
| "loss": 1.5012, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12105263157894737, | |
| "grad_norm": 1.279000313318411, | |
| "learning_rate": 1.936842105263158e-05, | |
| "loss": 1.5011, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.12631578947368421, | |
| "grad_norm": 0.922398830661329, | |
| "learning_rate": 2.0210526315789475e-05, | |
| "loss": 1.4624, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.13157894736842105, | |
| "grad_norm": 1.3274580990016782, | |
| "learning_rate": 2.105263157894737e-05, | |
| "loss": 1.4645, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1368421052631579, | |
| "grad_norm": 1.1341340679878056, | |
| "learning_rate": 2.1894736842105266e-05, | |
| "loss": 1.4715, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.14210526315789473, | |
| "grad_norm": 1.2628910185228979, | |
| "learning_rate": 2.273684210526316e-05, | |
| "loss": 1.4584, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.14736842105263157, | |
| "grad_norm": 1.57433223809749, | |
| "learning_rate": 2.3578947368421054e-05, | |
| "loss": 1.448, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.15263157894736842, | |
| "grad_norm": 0.8438420059614518, | |
| "learning_rate": 2.442105263157895e-05, | |
| "loss": 1.4442, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.15789473684210525, | |
| "grad_norm": 1.280060736418443, | |
| "learning_rate": 2.526315789473684e-05, | |
| "loss": 1.4577, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1631578947368421, | |
| "grad_norm": 1.8388712899282178, | |
| "learning_rate": 2.610526315789474e-05, | |
| "loss": 1.4354, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.16842105263157894, | |
| "grad_norm": 1.1283525934214154, | |
| "learning_rate": 2.6947368421052632e-05, | |
| "loss": 1.4458, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.1736842105263158, | |
| "grad_norm": 1.559904631956879, | |
| "learning_rate": 2.778947368421053e-05, | |
| "loss": 1.4337, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.17894736842105263, | |
| "grad_norm": 1.433428530147804, | |
| "learning_rate": 2.8631578947368423e-05, | |
| "loss": 1.4311, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.18421052631578946, | |
| "grad_norm": 1.4475127262626666, | |
| "learning_rate": 2.9473684210526317e-05, | |
| "loss": 1.4296, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.18947368421052632, | |
| "grad_norm": 1.6258635588736965, | |
| "learning_rate": 3.0315789473684214e-05, | |
| "loss": 1.403, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.19473684210526315, | |
| "grad_norm": 0.9140879296838869, | |
| "learning_rate": 3.1157894736842105e-05, | |
| "loss": 1.4149, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.355462077600805, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 1.4241, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.20526315789473684, | |
| "grad_norm": 2.025270623377536, | |
| "learning_rate": 3.28421052631579e-05, | |
| "loss": 1.4269, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 1.5911005582582893, | |
| "learning_rate": 3.368421052631579e-05, | |
| "loss": 1.4261, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.21578947368421053, | |
| "grad_norm": 1.7736374990816877, | |
| "learning_rate": 3.452631578947369e-05, | |
| "loss": 1.4248, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.22105263157894736, | |
| "grad_norm": 2.020103172778917, | |
| "learning_rate": 3.536842105263158e-05, | |
| "loss": 1.4019, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.22631578947368422, | |
| "grad_norm": 2.1032904325246693, | |
| "learning_rate": 3.621052631578948e-05, | |
| "loss": 1.4138, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.23157894736842105, | |
| "grad_norm": 2.034839240989353, | |
| "learning_rate": 3.705263157894737e-05, | |
| "loss": 1.4099, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.23684210526315788, | |
| "grad_norm": 1.5310564952941104, | |
| "learning_rate": 3.789473684210526e-05, | |
| "loss": 1.419, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.24210526315789474, | |
| "grad_norm": 2.3582192444588594, | |
| "learning_rate": 3.873684210526316e-05, | |
| "loss": 1.3989, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.24736842105263157, | |
| "grad_norm": 1.2404229618115798, | |
| "learning_rate": 3.9578947368421056e-05, | |
| "loss": 1.4034, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.25263157894736843, | |
| "grad_norm": 2.631335015977353, | |
| "learning_rate": 4.042105263157895e-05, | |
| "loss": 1.4067, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.2578947368421053, | |
| "grad_norm": 1.7741169104229972, | |
| "learning_rate": 4.126315789473685e-05, | |
| "loss": 1.3842, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 2.7958288897467813, | |
| "learning_rate": 4.210526315789474e-05, | |
| "loss": 1.4165, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.26842105263157895, | |
| "grad_norm": 2.106018978323054, | |
| "learning_rate": 4.294736842105264e-05, | |
| "loss": 1.408, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2736842105263158, | |
| "grad_norm": 2.1854136752650284, | |
| "learning_rate": 4.378947368421053e-05, | |
| "loss": 1.4039, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2789473684210526, | |
| "grad_norm": 2.1320541034792897, | |
| "learning_rate": 4.463157894736842e-05, | |
| "loss": 1.3792, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.28421052631578947, | |
| "grad_norm": 2.1929241850886183, | |
| "learning_rate": 4.547368421052632e-05, | |
| "loss": 1.405, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2894736842105263, | |
| "grad_norm": 2.4083834163686406, | |
| "learning_rate": 4.6315789473684214e-05, | |
| "loss": 1.397, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.29473684210526313, | |
| "grad_norm": 1.8900388617787558, | |
| "learning_rate": 4.715789473684211e-05, | |
| "loss": 1.398, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.8869758461876107, | |
| "learning_rate": 4.8e-05, | |
| "loss": 1.399, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.30526315789473685, | |
| "grad_norm": 1.9387939885602292, | |
| "learning_rate": 4.88421052631579e-05, | |
| "loss": 1.4029, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3105263157894737, | |
| "grad_norm": 2.8691231917123, | |
| "learning_rate": 4.9684210526315796e-05, | |
| "loss": 1.3774, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 2.023114295041414, | |
| "learning_rate": 5.052631578947368e-05, | |
| "loss": 1.3925, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.32105263157894737, | |
| "grad_norm": 1.8266981690923911, | |
| "learning_rate": 5.136842105263158e-05, | |
| "loss": 1.3752, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3263157894736842, | |
| "grad_norm": 2.749570229085121, | |
| "learning_rate": 5.221052631578948e-05, | |
| "loss": 1.3846, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.33157894736842103, | |
| "grad_norm": 1.577733284649954, | |
| "learning_rate": 5.305263157894737e-05, | |
| "loss": 1.3983, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3368421052631579, | |
| "grad_norm": 3.4308320005068897, | |
| "learning_rate": 5.3894736842105265e-05, | |
| "loss": 1.3898, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.34210526315789475, | |
| "grad_norm": 2.7293556039435583, | |
| "learning_rate": 5.4736842105263165e-05, | |
| "loss": 1.3801, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3473684210526316, | |
| "grad_norm": 2.7986318090963036, | |
| "learning_rate": 5.557894736842106e-05, | |
| "loss": 1.3929, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3526315789473684, | |
| "grad_norm": 2.5730126104338407, | |
| "learning_rate": 5.642105263157895e-05, | |
| "loss": 1.382, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.35789473684210527, | |
| "grad_norm": 2.467033189056739, | |
| "learning_rate": 5.726315789473685e-05, | |
| "loss": 1.3738, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3631578947368421, | |
| "grad_norm": 1.5266025457633567, | |
| "learning_rate": 5.810526315789475e-05, | |
| "loss": 1.394, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3684210526315789, | |
| "grad_norm": 2.1446942030427567, | |
| "learning_rate": 5.8947368421052634e-05, | |
| "loss": 1.3845, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3736842105263158, | |
| "grad_norm": 2.3417890559923986, | |
| "learning_rate": 5.978947368421053e-05, | |
| "loss": 1.3841, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.37894736842105264, | |
| "grad_norm": 1.9604854202915063, | |
| "learning_rate": 6.063157894736843e-05, | |
| "loss": 1.3834, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.38421052631578945, | |
| "grad_norm": 2.4507109733612578, | |
| "learning_rate": 6.147368421052632e-05, | |
| "loss": 1.4071, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3894736842105263, | |
| "grad_norm": 3.5995720583445063, | |
| "learning_rate": 6.231578947368421e-05, | |
| "loss": 1.3704, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.39473684210526316, | |
| "grad_norm": 2.0182422302151797, | |
| "learning_rate": 6.315789473684212e-05, | |
| "loss": 1.379, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 4.1134652051924, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 1.3788, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.4052631578947368, | |
| "grad_norm": 2.929511327190635, | |
| "learning_rate": 6.484210526315789e-05, | |
| "loss": 1.3914, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.4105263157894737, | |
| "grad_norm": 3.5649183111031406, | |
| "learning_rate": 6.56842105263158e-05, | |
| "loss": 1.3715, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.41578947368421054, | |
| "grad_norm": 3.365859410878661, | |
| "learning_rate": 6.652631578947369e-05, | |
| "loss": 1.3986, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 3.304005403851428, | |
| "learning_rate": 6.736842105263159e-05, | |
| "loss": 1.3783, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4263157894736842, | |
| "grad_norm": 2.894847638309193, | |
| "learning_rate": 6.821052631578948e-05, | |
| "loss": 1.3802, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.43157894736842106, | |
| "grad_norm": 2.698808992917365, | |
| "learning_rate": 6.905263157894737e-05, | |
| "loss": 1.3761, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4368421052631579, | |
| "grad_norm": 2.163408526778829, | |
| "learning_rate": 6.989473684210527e-05, | |
| "loss": 1.3911, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.4421052631578947, | |
| "grad_norm": 3.2099232827707014, | |
| "learning_rate": 7.073684210526316e-05, | |
| "loss": 1.3926, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.4473684210526316, | |
| "grad_norm": 2.475621296000252, | |
| "learning_rate": 7.157894736842105e-05, | |
| "loss": 1.3932, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.45263157894736844, | |
| "grad_norm": 2.9203370802467936, | |
| "learning_rate": 7.242105263157896e-05, | |
| "loss": 1.3822, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.45789473684210524, | |
| "grad_norm": 2.3862423279450393, | |
| "learning_rate": 7.326315789473684e-05, | |
| "loss": 1.3721, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4631578947368421, | |
| "grad_norm": 3.275399501836966, | |
| "learning_rate": 7.410526315789474e-05, | |
| "loss": 1.4002, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.46842105263157896, | |
| "grad_norm": 2.5587038178412533, | |
| "learning_rate": 7.494736842105264e-05, | |
| "loss": 1.3812, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.47368421052631576, | |
| "grad_norm": 3.0292582788342375, | |
| "learning_rate": 7.578947368421052e-05, | |
| "loss": 1.3679, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4789473684210526, | |
| "grad_norm": 2.4725567771769366, | |
| "learning_rate": 7.663157894736843e-05, | |
| "loss": 1.3825, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4842105263157895, | |
| "grad_norm": 2.983972110527992, | |
| "learning_rate": 7.747368421052633e-05, | |
| "loss": 1.3727, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.48947368421052634, | |
| "grad_norm": 2.498982942091222, | |
| "learning_rate": 7.831578947368422e-05, | |
| "loss": 1.3769, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.49473684210526314, | |
| "grad_norm": 3.526109045872143, | |
| "learning_rate": 7.915789473684211e-05, | |
| "loss": 1.3788, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 3.1483584936822284, | |
| "learning_rate": 8e-05, | |
| "loss": 1.3643, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5052631578947369, | |
| "grad_norm": 2.0080980217269517, | |
| "learning_rate": 7.999972997932227e-05, | |
| "loss": 1.3846, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5105263157894737, | |
| "grad_norm": 4.353229694894079, | |
| "learning_rate": 7.999891992093464e-05, | |
| "loss": 1.3787, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5157894736842106, | |
| "grad_norm": 2.878217785277169, | |
| "learning_rate": 7.999756983577373e-05, | |
| "loss": 1.3695, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5210526315789473, | |
| "grad_norm": 1.962122819070823, | |
| "learning_rate": 7.999567974206707e-05, | |
| "loss": 1.364, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 3.9822305254995887, | |
| "learning_rate": 7.999324966533291e-05, | |
| "loss": 1.3928, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.531578947368421, | |
| "grad_norm": 2.4880539516369713, | |
| "learning_rate": 7.999027963837979e-05, | |
| "loss": 1.3656, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5368421052631579, | |
| "grad_norm": 4.910309768750227, | |
| "learning_rate": 7.998676970130614e-05, | |
| "loss": 1.3802, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5421052631578948, | |
| "grad_norm": 2.905093024244942, | |
| "learning_rate": 7.998271990149972e-05, | |
| "loss": 1.3731, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5473684210526316, | |
| "grad_norm": 3.8609393179697284, | |
| "learning_rate": 7.997813029363704e-05, | |
| "loss": 1.4037, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5526315789473685, | |
| "grad_norm": 3.6672144152714865, | |
| "learning_rate": 7.997300093968255e-05, | |
| "loss": 1.3739, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5578947368421052, | |
| "grad_norm": 3.0897791527691, | |
| "learning_rate": 7.996733190888783e-05, | |
| "loss": 1.3729, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5631578947368421, | |
| "grad_norm": 3.0709819435052794, | |
| "learning_rate": 7.996112327779065e-05, | |
| "loss": 1.3735, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5684210526315789, | |
| "grad_norm": 2.4110812616502053, | |
| "learning_rate": 7.995437513021393e-05, | |
| "loss": 1.3625, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5736842105263158, | |
| "grad_norm": 3.20735512285491, | |
| "learning_rate": 7.994708755726469e-05, | |
| "loss": 1.3646, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5789473684210527, | |
| "grad_norm": 3.410126968058674, | |
| "learning_rate": 7.993926065733265e-05, | |
| "loss": 1.3828, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5842105263157895, | |
| "grad_norm": 1.9981274214741556, | |
| "learning_rate": 7.993089453608908e-05, | |
| "loss": 1.3614, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5894736842105263, | |
| "grad_norm": 3.8348881514308104, | |
| "learning_rate": 7.992198930648527e-05, | |
| "loss": 1.366, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5947368421052631, | |
| "grad_norm": 3.6103553587856188, | |
| "learning_rate": 7.991254508875098e-05, | |
| "loss": 1.3797, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.545996054998508, | |
| "learning_rate": 7.990256201039297e-05, | |
| "loss": 1.37, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6052631578947368, | |
| "grad_norm": 2.9285662609146597, | |
| "learning_rate": 7.98920402061931e-05, | |
| "loss": 1.3691, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6105263157894737, | |
| "grad_norm": 3.497818597459857, | |
| "learning_rate": 7.988097981820659e-05, | |
| "loss": 1.3724, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6157894736842106, | |
| "grad_norm": 1.955436154771224, | |
| "learning_rate": 7.986938099576015e-05, | |
| "loss": 1.3553, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6210526315789474, | |
| "grad_norm": 2.024911219916847, | |
| "learning_rate": 7.985724389544982e-05, | |
| "loss": 1.3736, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6263157894736842, | |
| "grad_norm": 2.726230215863742, | |
| "learning_rate": 7.984456868113905e-05, | |
| "loss": 1.3666, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 1.8902008783219175, | |
| "learning_rate": 7.98313555239563e-05, | |
| "loss": 1.358, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6368421052631579, | |
| "grad_norm": 3.3343114876725215, | |
| "learning_rate": 7.98176046022929e-05, | |
| "loss": 1.3674, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6421052631578947, | |
| "grad_norm": 2.8760888936249716, | |
| "learning_rate": 7.980331610180046e-05, | |
| "loss": 1.3598, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6473684210526316, | |
| "grad_norm": 1.821327982649168, | |
| "learning_rate": 7.978849021538855e-05, | |
| "loss": 1.3559, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6526315789473685, | |
| "grad_norm": 2.1330475395816038, | |
| "learning_rate": 7.977312714322193e-05, | |
| "loss": 1.3529, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6578947368421053, | |
| "grad_norm": 2.6018688330295032, | |
| "learning_rate": 7.975722709271799e-05, | |
| "loss": 1.3537, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6631578947368421, | |
| "grad_norm": 2.8283770858028143, | |
| "learning_rate": 7.974079027854382e-05, | |
| "loss": 1.3591, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6684210526315789, | |
| "grad_norm": 2.0562890332720625, | |
| "learning_rate": 7.972381692261343e-05, | |
| "loss": 1.3523, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6736842105263158, | |
| "grad_norm": 2.677519447523674, | |
| "learning_rate": 7.970630725408467e-05, | |
| "loss": 1.3588, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6789473684210526, | |
| "grad_norm": 3.39520049259372, | |
| "learning_rate": 7.968826150935615e-05, | |
| "loss": 1.357, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6842105263157895, | |
| "grad_norm": 1.326798562942608, | |
| "learning_rate": 7.96696799320641e-05, | |
| "loss": 1.3547, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6894736842105263, | |
| "grad_norm": 5.713227145762471, | |
| "learning_rate": 7.965056277307902e-05, | |
| "loss": 1.405, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6947368421052632, | |
| "grad_norm": 4.708892369834835, | |
| "learning_rate": 7.963091029050231e-05, | |
| "loss": 1.4096, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 3.526071821361426, | |
| "learning_rate": 7.961072274966282e-05, | |
| "loss": 1.3766, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7052631578947368, | |
| "grad_norm": 3.421098464992638, | |
| "learning_rate": 7.95900004231132e-05, | |
| "loss": 1.372, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7105263157894737, | |
| "grad_norm": 3.1868582792498468, | |
| "learning_rate": 7.956874359062632e-05, | |
| "loss": 1.3742, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7157894736842105, | |
| "grad_norm": 2.5369692116526354, | |
| "learning_rate": 7.954695253919138e-05, | |
| "loss": 1.38, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7210526315789474, | |
| "grad_norm": 4.029612203074803, | |
| "learning_rate": 7.952462756301007e-05, | |
| "loss": 1.3789, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7263157894736842, | |
| "grad_norm": 3.071634072769698, | |
| "learning_rate": 7.95017689634927e-05, | |
| "loss": 1.3692, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7315789473684211, | |
| "grad_norm": 3.38738695405503, | |
| "learning_rate": 7.947837704925396e-05, | |
| "loss": 1.3692, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "grad_norm": 2.900109575705123, | |
| "learning_rate": 7.94544521361089e-05, | |
| "loss": 1.3851, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7421052631578947, | |
| "grad_norm": 2.7866463709429903, | |
| "learning_rate": 7.942999454706858e-05, | |
| "loss": 1.3797, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7473684210526316, | |
| "grad_norm": 2.053753694637562, | |
| "learning_rate": 7.940500461233572e-05, | |
| "loss": 1.3697, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7526315789473684, | |
| "grad_norm": 2.9697850807629464, | |
| "learning_rate": 7.93794826693003e-05, | |
| "loss": 1.349, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7578947368421053, | |
| "grad_norm": 2.370919157592963, | |
| "learning_rate": 7.935342906253492e-05, | |
| "loss": 1.3556, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7631578947368421, | |
| "grad_norm": 3.3074201358634125, | |
| "learning_rate": 7.932684414379021e-05, | |
| "loss": 1.3656, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7684210526315789, | |
| "grad_norm": 2.2648377858473605, | |
| "learning_rate": 7.929972827199006e-05, | |
| "loss": 1.3704, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7736842105263158, | |
| "grad_norm": 2.835496161570207, | |
| "learning_rate": 7.927208181322679e-05, | |
| "loss": 1.3466, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7789473684210526, | |
| "grad_norm": 2.343363991518181, | |
| "learning_rate": 7.924390514075616e-05, | |
| "loss": 1.3726, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7842105263157895, | |
| "grad_norm": 2.7560515706025455, | |
| "learning_rate": 7.921519863499239e-05, | |
| "loss": 1.3626, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7894736842105263, | |
| "grad_norm": 2.4007554609918564, | |
| "learning_rate": 7.918596268350296e-05, | |
| "loss": 1.3587, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7947368421052632, | |
| "grad_norm": 2.5965741897469896, | |
| "learning_rate": 7.915619768100348e-05, | |
| "loss": 1.3813, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.0691402568946606, | |
| "learning_rate": 7.912590402935223e-05, | |
| "loss": 1.3466, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8052631578947368, | |
| "grad_norm": 2.682134570279511, | |
| "learning_rate": 7.909508213754484e-05, | |
| "loss": 1.3484, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.8105263157894737, | |
| "grad_norm": 2.961951846828786, | |
| "learning_rate": 7.906373242170872e-05, | |
| "loss": 1.356, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.8157894736842105, | |
| "grad_norm": 1.647563803925644, | |
| "learning_rate": 7.903185530509743e-05, | |
| "loss": 1.3314, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8210526315789474, | |
| "grad_norm": 1.7184813388266553, | |
| "learning_rate": 7.899945121808501e-05, | |
| "loss": 1.3521, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8263157894736842, | |
| "grad_norm": 2.795936841854527, | |
| "learning_rate": 7.896652059816015e-05, | |
| "loss": 1.3635, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8315789473684211, | |
| "grad_norm": 3.0399857654385034, | |
| "learning_rate": 7.893306388992023e-05, | |
| "loss": 1.3619, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8368421052631579, | |
| "grad_norm": 1.5156478386100931, | |
| "learning_rate": 7.889908154506545e-05, | |
| "loss": 1.332, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 2.299123403411113, | |
| "learning_rate": 7.886457402239256e-05, | |
| "loss": 1.351, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8473684210526315, | |
| "grad_norm": 2.307814996765452, | |
| "learning_rate": 7.88295417877888e-05, | |
| "loss": 1.3565, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8526315789473684, | |
| "grad_norm": 3.2987572063667643, | |
| "learning_rate": 7.879398531422558e-05, | |
| "loss": 1.3719, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8578947368421053, | |
| "grad_norm": 1.845792873065677, | |
| "learning_rate": 7.875790508175202e-05, | |
| "loss": 1.3384, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.8631578947368421, | |
| "grad_norm": 2.4609311250378942, | |
| "learning_rate": 7.87213015774886e-05, | |
| "loss": 1.3633, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.868421052631579, | |
| "grad_norm": 2.693256637820666, | |
| "learning_rate": 7.868417529562043e-05, | |
| "loss": 1.3639, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8736842105263158, | |
| "grad_norm": 1.5086514518899445, | |
| "learning_rate": 7.864652673739073e-05, | |
| "loss": 1.3615, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8789473684210526, | |
| "grad_norm": 3.1066465037527147, | |
| "learning_rate": 7.860835641109395e-05, | |
| "loss": 1.3507, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.8842105263157894, | |
| "grad_norm": 2.327552820376472, | |
| "learning_rate": 7.856966483206897e-05, | |
| "loss": 1.3458, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8894736842105263, | |
| "grad_norm": 2.808477876459289, | |
| "learning_rate": 7.853045252269208e-05, | |
| "loss": 1.3601, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8947368421052632, | |
| "grad_norm": 2.544696040692953, | |
| "learning_rate": 7.849072001237001e-05, | |
| "loss": 1.3529, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 2.9946344095632575, | |
| "learning_rate": 7.845046783753276e-05, | |
| "loss": 1.3612, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.9052631578947369, | |
| "grad_norm": 2.1670615109125646, | |
| "learning_rate": 7.840969654162627e-05, | |
| "loss": 1.3403, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.9105263157894737, | |
| "grad_norm": 2.4274632796911324, | |
| "learning_rate": 7.83684066751052e-05, | |
| "loss": 1.3492, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9157894736842105, | |
| "grad_norm": 2.2078510708150416, | |
| "learning_rate": 7.832659879542544e-05, | |
| "loss": 1.3322, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9210526315789473, | |
| "grad_norm": 2.834266147883312, | |
| "learning_rate": 7.828427346703657e-05, | |
| "loss": 1.3658, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9263157894736842, | |
| "grad_norm": 2.5098411818246156, | |
| "learning_rate": 7.824143126137431e-05, | |
| "loss": 1.3343, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9315789473684211, | |
| "grad_norm": 1.9856802860400529, | |
| "learning_rate": 7.819807275685272e-05, | |
| "loss": 1.3408, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.9368421052631579, | |
| "grad_norm": 2.4068321545997717, | |
| "learning_rate": 7.815419853885644e-05, | |
| "loss": 1.3482, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9421052631578948, | |
| "grad_norm": 2.0424531148819507, | |
| "learning_rate": 7.810980919973277e-05, | |
| "loss": 1.3492, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.9473684210526315, | |
| "grad_norm": 2.88855449179049, | |
| "learning_rate": 7.806490533878368e-05, | |
| "loss": 1.3409, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9526315789473684, | |
| "grad_norm": 2.2025647302444593, | |
| "learning_rate": 7.801948756225772e-05, | |
| "loss": 1.3552, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9578947368421052, | |
| "grad_norm": 1.705774295065343, | |
| "learning_rate": 7.797355648334185e-05, | |
| "loss": 1.3298, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9631578947368421, | |
| "grad_norm": 2.0591090166453907, | |
| "learning_rate": 7.792711272215308e-05, | |
| "loss": 1.3234, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.968421052631579, | |
| "grad_norm": 2.543058246007598, | |
| "learning_rate": 7.788015690573025e-05, | |
| "loss": 1.3454, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.9736842105263158, | |
| "grad_norm": 2.7945291673690273, | |
| "learning_rate": 7.783268966802539e-05, | |
| "loss": 1.3623, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9789473684210527, | |
| "grad_norm": 1.2623547103194999, | |
| "learning_rate": 7.778471164989532e-05, | |
| "loss": 1.3253, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.9842105263157894, | |
| "grad_norm": 2.8671346135920555, | |
| "learning_rate": 7.773622349909285e-05, | |
| "loss": 1.3516, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.9894736842105263, | |
| "grad_norm": 2.37666359514784, | |
| "learning_rate": 7.768722587025818e-05, | |
| "loss": 1.333, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9947368421052631, | |
| "grad_norm": 2.0642846376852697, | |
| "learning_rate": 7.763771942490995e-05, | |
| "loss": 1.3514, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.7892483033365818, | |
| "learning_rate": 7.758770483143634e-05, | |
| "loss": 1.3383, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0052631578947369, | |
| "grad_norm": 2.3383926476340875, | |
| "learning_rate": 7.753718276508609e-05, | |
| "loss": 1.3296, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.0105263157894737, | |
| "grad_norm": 2.4843645129343686, | |
| "learning_rate": 7.748615390795932e-05, | |
| "loss": 1.3271, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.0157894736842106, | |
| "grad_norm": 2.418044856395934, | |
| "learning_rate": 7.743461894899837e-05, | |
| "loss": 1.3272, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.0210526315789474, | |
| "grad_norm": 1.5738686856678197, | |
| "learning_rate": 7.738257858397844e-05, | |
| "loss": 1.3345, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.0263157894736843, | |
| "grad_norm": 2.648547362628862, | |
| "learning_rate": 7.733003351549829e-05, | |
| "loss": 1.3334, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.0315789473684212, | |
| "grad_norm": 1.882461821114559, | |
| "learning_rate": 7.727698445297066e-05, | |
| "loss": 1.3129, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.0368421052631578, | |
| "grad_norm": 2.4800191556167586, | |
| "learning_rate": 7.722343211261274e-05, | |
| "loss": 1.3254, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.0421052631578946, | |
| "grad_norm": 2.23843270259424, | |
| "learning_rate": 7.71693772174365e-05, | |
| "loss": 1.3273, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.0473684210526315, | |
| "grad_norm": 1.9793326670930025, | |
| "learning_rate": 7.71148204972389e-05, | |
| "loss": 1.3286, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 2.1730809011656502, | |
| "learning_rate": 7.705976268859207e-05, | |
| "loss": 1.3245, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0578947368421052, | |
| "grad_norm": 2.185445170389663, | |
| "learning_rate": 7.700420453483336e-05, | |
| "loss": 1.3222, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.063157894736842, | |
| "grad_norm": 2.3909717751217903, | |
| "learning_rate": 7.694814678605528e-05, | |
| "loss": 1.325, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.068421052631579, | |
| "grad_norm": 1.923075293942803, | |
| "learning_rate": 7.68915901990954e-05, | |
| "loss": 1.3107, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.0736842105263158, | |
| "grad_norm": 1.7281637350526677, | |
| "learning_rate": 7.683453553752611e-05, | |
| "loss": 1.3252, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.0789473684210527, | |
| "grad_norm": 3.2565617242431055, | |
| "learning_rate": 7.677698357164431e-05, | |
| "loss": 1.3269, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.0842105263157895, | |
| "grad_norm": 1.2312572256380077, | |
| "learning_rate": 7.671893507846109e-05, | |
| "loss": 1.3208, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.0894736842105264, | |
| "grad_norm": 2.928938984187919, | |
| "learning_rate": 7.66603908416911e-05, | |
| "loss": 1.3313, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.0947368421052632, | |
| "grad_norm": 2.9307098888263026, | |
| "learning_rate": 7.660135165174205e-05, | |
| "loss": 1.3455, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 1.593997332291012, | |
| "learning_rate": 7.654181830570404e-05, | |
| "loss": 1.3103, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.1052631578947367, | |
| "grad_norm": 2.6269588682109952, | |
| "learning_rate": 7.648179160733883e-05, | |
| "loss": 1.3167, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.1105263157894736, | |
| "grad_norm": 2.4790660783022043, | |
| "learning_rate": 7.642127236706887e-05, | |
| "loss": 1.3164, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.1157894736842104, | |
| "grad_norm": 2.1512908725199544, | |
| "learning_rate": 7.636026140196651e-05, | |
| "loss": 1.3067, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.1210526315789473, | |
| "grad_norm": 1.5786680257092611, | |
| "learning_rate": 7.629875953574282e-05, | |
| "loss": 1.3248, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.1263157894736842, | |
| "grad_norm": 1.8684394631658845, | |
| "learning_rate": 7.623676759873661e-05, | |
| "loss": 1.3356, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.131578947368421, | |
| "grad_norm": 1.83011908562494, | |
| "learning_rate": 7.61742864279031e-05, | |
| "loss": 1.3243, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.1368421052631579, | |
| "grad_norm": 2.5453116567442686, | |
| "learning_rate": 7.611131686680272e-05, | |
| "loss": 1.3202, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.1421052631578947, | |
| "grad_norm": 1.899610653373559, | |
| "learning_rate": 7.604785976558961e-05, | |
| "loss": 1.3196, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.1473684210526316, | |
| "grad_norm": 2.7819319499079143, | |
| "learning_rate": 7.598391598100029e-05, | |
| "loss": 1.3223, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.1526315789473685, | |
| "grad_norm": 2.119997221986913, | |
| "learning_rate": 7.591948637634193e-05, | |
| "loss": 1.3304, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.1578947368421053, | |
| "grad_norm": 2.4856497917141254, | |
| "learning_rate": 7.585457182148081e-05, | |
| "loss": 1.3036, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1631578947368422, | |
| "grad_norm": 2.1055551992495847, | |
| "learning_rate": 7.578917319283055e-05, | |
| "loss": 1.3269, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.168421052631579, | |
| "grad_norm": 2.2379057425917424, | |
| "learning_rate": 7.572329137334023e-05, | |
| "loss": 1.3084, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.1736842105263159, | |
| "grad_norm": 2.0190272934745055, | |
| "learning_rate": 7.565692725248254e-05, | |
| "loss": 1.3251, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.1789473684210527, | |
| "grad_norm": 1.2213400585349068, | |
| "learning_rate": 7.559008172624174e-05, | |
| "loss": 1.3089, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.1842105263157894, | |
| "grad_norm": 3.4238358307196375, | |
| "learning_rate": 7.552275569710152e-05, | |
| "loss": 1.3188, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.1894736842105262, | |
| "grad_norm": 1.9434637558797097, | |
| "learning_rate": 7.545495007403287e-05, | |
| "loss": 1.3197, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.194736842105263, | |
| "grad_norm": 3.2480882471479164, | |
| "learning_rate": 7.538666577248184e-05, | |
| "loss": 1.3248, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 2.686792838642632, | |
| "learning_rate": 7.531790371435709e-05, | |
| "loss": 1.3166, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.2052631578947368, | |
| "grad_norm": 2.667702689555652, | |
| "learning_rate": 7.524866482801748e-05, | |
| "loss": 1.3118, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.2105263157894737, | |
| "grad_norm": 2.0267106721992003, | |
| "learning_rate": 7.517895004825956e-05, | |
| "loss": 1.3311, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.2157894736842105, | |
| "grad_norm": 3.195120176439168, | |
| "learning_rate": 7.510876031630496e-05, | |
| "loss": 1.322, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.2210526315789474, | |
| "grad_norm": 1.9034234117765794, | |
| "learning_rate": 7.503809657978762e-05, | |
| "loss": 1.3226, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.2263157894736842, | |
| "grad_norm": 3.690905599022198, | |
| "learning_rate": 7.496695979274103e-05, | |
| "loss": 1.3255, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.231578947368421, | |
| "grad_norm": 3.145636629896195, | |
| "learning_rate": 7.489535091558536e-05, | |
| "loss": 1.3381, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.236842105263158, | |
| "grad_norm": 2.5072433312959843, | |
| "learning_rate": 7.48232709151145e-05, | |
| "loss": 1.3219, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.2421052631578948, | |
| "grad_norm": 3.2107352113754186, | |
| "learning_rate": 7.475072076448298e-05, | |
| "loss": 1.3227, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.2473684210526317, | |
| "grad_norm": 1.58975425995912, | |
| "learning_rate": 7.467770144319283e-05, | |
| "loss": 1.3333, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.2526315789473683, | |
| "grad_norm": 3.7587617169131082, | |
| "learning_rate": 7.460421393708039e-05, | |
| "loss": 1.3509, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.2578947368421054, | |
| "grad_norm": 2.5861078342959614, | |
| "learning_rate": 7.453025923830296e-05, | |
| "loss": 1.3361, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.263157894736842, | |
| "grad_norm": 3.398411462187095, | |
| "learning_rate": 7.445583834532546e-05, | |
| "loss": 1.3309, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.268421052631579, | |
| "grad_norm": 2.4423289107004664, | |
| "learning_rate": 7.438095226290685e-05, | |
| "loss": 1.337, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.2736842105263158, | |
| "grad_norm": 2.4746167081096324, | |
| "learning_rate": 7.430560200208669e-05, | |
| "loss": 1.3105, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.2789473684210526, | |
| "grad_norm": 2.7651610722472353, | |
| "learning_rate": 7.42297885801714e-05, | |
| "loss": 1.3243, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.2842105263157895, | |
| "grad_norm": 1.6511969920414749, | |
| "learning_rate": 7.415351302072056e-05, | |
| "loss": 1.3105, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.2894736842105263, | |
| "grad_norm": 3.104346761016083, | |
| "learning_rate": 7.407677635353308e-05, | |
| "loss": 1.3298, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.2947368421052632, | |
| "grad_norm": 2.3550214994148235, | |
| "learning_rate": 7.399957961463332e-05, | |
| "loss": 1.3649, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 2.266967394498034, | |
| "learning_rate": 7.392192384625704e-05, | |
| "loss": 1.3363, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.305263157894737, | |
| "grad_norm": 3.1011314193494104, | |
| "learning_rate": 7.384381009683742e-05, | |
| "loss": 1.3252, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.3105263157894738, | |
| "grad_norm": 2.133334459450928, | |
| "learning_rate": 7.376523942099084e-05, | |
| "loss": 1.3307, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.3157894736842106, | |
| "grad_norm": 4.326087201648726, | |
| "learning_rate": 7.368621287950264e-05, | |
| "loss": 1.4045, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3210526315789473, | |
| "grad_norm": 22.674882226571523, | |
| "learning_rate": 7.360673153931285e-05, | |
| "loss": 1.3348, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.3263157894736843, | |
| "grad_norm": 3.064941459260646, | |
| "learning_rate": 7.352679647350172e-05, | |
| "loss": 1.3425, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.331578947368421, | |
| "grad_norm": 2.6376251183297055, | |
| "learning_rate": 7.344640876127529e-05, | |
| "loss": 1.3389, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.3368421052631578, | |
| "grad_norm": 3.518986507081023, | |
| "learning_rate": 7.33655694879508e-05, | |
| "loss": 1.325, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.3421052631578947, | |
| "grad_norm": 6.772119772438152, | |
| "learning_rate": 7.328427974494201e-05, | |
| "loss": 1.3435, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.3473684210526315, | |
| "grad_norm": 3.1439267189525966, | |
| "learning_rate": 7.32025406297445e-05, | |
| "loss": 1.3482, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.3526315789473684, | |
| "grad_norm": 2.9757174788747442, | |
| "learning_rate": 7.312035324592081e-05, | |
| "loss": 1.4253, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.3578947368421053, | |
| "grad_norm": 19.234550469937634, | |
| "learning_rate": 7.303771870308561e-05, | |
| "loss": 1.5748, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.3631578947368421, | |
| "grad_norm": 166.65509126340316, | |
| "learning_rate": 7.295463811689069e-05, | |
| "loss": 7.3386, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.368421052631579, | |
| "grad_norm": 37.296808447795385, | |
| "learning_rate": 7.28711126090098e-05, | |
| "loss": 7.6292, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.3736842105263158, | |
| "grad_norm": 206.77559967714524, | |
| "learning_rate": 7.278714330712372e-05, | |
| "loss": 5.9669, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.3789473684210527, | |
| "grad_norm": 31.10081965111831, | |
| "learning_rate": 7.27027313449048e-05, | |
| "loss": 1.9804, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.3842105263157896, | |
| "grad_norm": 273.4851676662734, | |
| "learning_rate": 7.261787786200179e-05, | |
| "loss": 4.0434, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.3894736842105262, | |
| "grad_norm": 21.94907796550795, | |
| "learning_rate": 7.253258400402448e-05, | |
| "loss": 2.3785, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.3947368421052633, | |
| "grad_norm": 87.34643583499242, | |
| "learning_rate": 7.24468509225281e-05, | |
| "loss": 3.2623, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 14.043694608373665, | |
| "learning_rate": 7.236067977499791e-05, | |
| "loss": 2.0359, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.4052631578947368, | |
| "grad_norm": 204.7560211103692, | |
| "learning_rate": 7.227407172483348e-05, | |
| "loss": 2.6066, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.4105263157894736, | |
| "grad_norm": 8.50429112485772, | |
| "learning_rate": 7.218702794133304e-05, | |
| "loss": 1.8554, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.4157894736842105, | |
| "grad_norm": 7.0947931831701805, | |
| "learning_rate": 7.209954959967765e-05, | |
| "loss": 1.7393, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.4210526315789473, | |
| "grad_norm": 3.181643344667253, | |
| "learning_rate": 7.201163788091536e-05, | |
| "loss": 1.5682, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.4263157894736842, | |
| "grad_norm": 2.0126327538765865, | |
| "learning_rate": 7.192329397194529e-05, | |
| "loss": 1.4786, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.431578947368421, | |
| "grad_norm": 2.7578093818109175, | |
| "learning_rate": 7.183451906550155e-05, | |
| "loss": 1.4642, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.436842105263158, | |
| "grad_norm": 1.9145504095924126, | |
| "learning_rate": 7.174531436013712e-05, | |
| "loss": 1.4291, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.4421052631578948, | |
| "grad_norm": 2.830916672053224, | |
| "learning_rate": 7.165568106020779e-05, | |
| "loss": 1.4538, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.4473684210526316, | |
| "grad_norm": 3.3750230003094464, | |
| "learning_rate": 7.156562037585576e-05, | |
| "loss": 1.4218, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.4526315789473685, | |
| "grad_norm": 1.886048219133163, | |
| "learning_rate": 7.147513352299336e-05, | |
| "loss": 1.4005, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.4578947368421051, | |
| "grad_norm": 4.612254413421206, | |
| "learning_rate": 7.138422172328671e-05, | |
| "loss": 1.4112, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.4631578947368422, | |
| "grad_norm": 3.475637100160211, | |
| "learning_rate": 7.129288620413907e-05, | |
| "loss": 1.388, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.4684210526315788, | |
| "grad_norm": 3.762823770210365, | |
| "learning_rate": 7.120112819867437e-05, | |
| "loss": 1.3941, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.4736842105263157, | |
| "grad_norm": 2.965363344704181, | |
| "learning_rate": 7.110894894572056e-05, | |
| "loss": 1.3815, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.4789473684210526, | |
| "grad_norm": 3.003148787485473, | |
| "learning_rate": 7.101634968979287e-05, | |
| "loss": 1.3805, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.4842105263157894, | |
| "grad_norm": 1.9778398666652557, | |
| "learning_rate": 7.092333168107697e-05, | |
| "loss": 1.3752, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.4894736842105263, | |
| "grad_norm": 3.5530639325816114, | |
| "learning_rate": 7.082989617541217e-05, | |
| "loss": 1.3919, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.4947368421052631, | |
| "grad_norm": 2.4964012979013144, | |
| "learning_rate": 7.073604443427437e-05, | |
| "loss": 1.3752, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 3.2586608061353224, | |
| "learning_rate": 7.064177772475912e-05, | |
| "loss": 1.3537, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.5052631578947369, | |
| "grad_norm": 2.7382891584432576, | |
| "learning_rate": 7.054709731956449e-05, | |
| "loss": 1.3548, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.5105263157894737, | |
| "grad_norm": 2.7983043107255714, | |
| "learning_rate": 7.045200449697379e-05, | |
| "loss": 1.355, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.5157894736842106, | |
| "grad_norm": 2.1575170098628207, | |
| "learning_rate": 7.035650054083847e-05, | |
| "loss": 1.3666, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.5210526315789474, | |
| "grad_norm": 2.280552356804481, | |
| "learning_rate": 7.026058674056067e-05, | |
| "loss": 1.3729, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.526315789473684, | |
| "grad_norm": 1.8204200442034197, | |
| "learning_rate": 7.016426439107586e-05, | |
| "loss": 1.3285, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.5315789473684212, | |
| "grad_norm": 2.2692718684429805, | |
| "learning_rate": 7.006753479283535e-05, | |
| "loss": 1.3432, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.5368421052631578, | |
| "grad_norm": 1.608298273784726, | |
| "learning_rate": 6.99703992517887e-05, | |
| "loss": 1.3457, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.5421052631578949, | |
| "grad_norm": 2.291066728931036, | |
| "learning_rate": 6.987285907936617e-05, | |
| "loss": 1.3489, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.5473684210526315, | |
| "grad_norm": 1.799873956016166, | |
| "learning_rate": 6.977491559246091e-05, | |
| "loss": 1.3538, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.5526315789473686, | |
| "grad_norm": 2.1836156231488144, | |
| "learning_rate": 6.967657011341126e-05, | |
| "loss": 1.3393, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.5578947368421052, | |
| "grad_norm": 1.656082168753184, | |
| "learning_rate": 6.957782396998289e-05, | |
| "loss": 1.3487, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.563157894736842, | |
| "grad_norm": 2.237518228348859, | |
| "learning_rate": 6.94786784953508e-05, | |
| "loss": 1.3431, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.568421052631579, | |
| "grad_norm": 1.8074440576933803, | |
| "learning_rate": 6.937913502808142e-05, | |
| "loss": 1.3338, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.5736842105263158, | |
| "grad_norm": 2.1852538797514134, | |
| "learning_rate": 6.927919491211447e-05, | |
| "loss": 1.3408, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 1.686931533274294, | |
| "learning_rate": 6.917885949674483e-05, | |
| "loss": 1.337, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.5842105263157895, | |
| "grad_norm": 2.484073100527864, | |
| "learning_rate": 6.907813013660437e-05, | |
| "loss": 1.3315, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.5894736842105264, | |
| "grad_norm": 1.9730996981374016, | |
| "learning_rate": 6.897700819164357e-05, | |
| "loss": 1.3383, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.594736842105263, | |
| "grad_norm": 1.4953094506502813, | |
| "learning_rate": 6.887549502711323e-05, | |
| "loss": 1.3316, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.7333934091879961, | |
| "learning_rate": 6.877359201354606e-05, | |
| "loss": 1.3338, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.6052631578947367, | |
| "grad_norm": 1.701314271187227, | |
| "learning_rate": 6.867130052673806e-05, | |
| "loss": 1.3233, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.6105263157894738, | |
| "grad_norm": 2.5170048810500365, | |
| "learning_rate": 6.856862194773008e-05, | |
| "loss": 1.3418, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.6157894736842104, | |
| "grad_norm": 1.1423723633356422, | |
| "learning_rate": 6.846555766278909e-05, | |
| "loss": 1.3456, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.6210526315789475, | |
| "grad_norm": 2.1226546892123688, | |
| "learning_rate": 6.83621090633895e-05, | |
| "loss": 1.3199, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.6263157894736842, | |
| "grad_norm": 2.120207603951501, | |
| "learning_rate": 6.825827754619434e-05, | |
| "loss": 1.3252, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.631578947368421, | |
| "grad_norm": 1.3158750566710444, | |
| "learning_rate": 6.815406451303647e-05, | |
| "loss": 1.3213, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.6368421052631579, | |
| "grad_norm": 2.597320776495221, | |
| "learning_rate": 6.804947137089955e-05, | |
| "loss": 1.3112, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.6421052631578947, | |
| "grad_norm": 1.6685217693160599, | |
| "learning_rate": 6.794449953189916e-05, | |
| "loss": 1.3074, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.6473684210526316, | |
| "grad_norm": 2.5188932447525283, | |
| "learning_rate": 6.783915041326364e-05, | |
| "loss": 1.331, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.6526315789473685, | |
| "grad_norm": 2.071056305940592, | |
| "learning_rate": 6.773342543731503e-05, | |
| "loss": 1.3173, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.6578947368421053, | |
| "grad_norm": 2.427656153267591, | |
| "learning_rate": 6.762732603144978e-05, | |
| "loss": 1.3329, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.663157894736842, | |
| "grad_norm": 1.6471906450412725, | |
| "learning_rate": 6.75208536281196e-05, | |
| "loss": 1.311, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.668421052631579, | |
| "grad_norm": 2.3066742827555022, | |
| "learning_rate": 6.7414009664812e-05, | |
| "loss": 1.3349, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.6736842105263157, | |
| "grad_norm": 1.8458843126503317, | |
| "learning_rate": 6.730679558403093e-05, | |
| "loss": 1.3236, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.6789473684210527, | |
| "grad_norm": 2.1861813200392843, | |
| "learning_rate": 6.719921283327736e-05, | |
| "loss": 1.3268, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.6842105263157894, | |
| "grad_norm": 2.1870673388161124, | |
| "learning_rate": 6.709126286502965e-05, | |
| "loss": 1.3019, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.6894736842105265, | |
| "grad_norm": 1.4274808199921123, | |
| "learning_rate": 6.698294713672395e-05, | |
| "loss": 1.3255, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.694736842105263, | |
| "grad_norm": 1.5694017468203492, | |
| "learning_rate": 6.687426711073462e-05, | |
| "loss": 1.3048, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 1.1078521144544478, | |
| "learning_rate": 6.676522425435433e-05, | |
| "loss": 1.3087, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.7052631578947368, | |
| "grad_norm": 2.4538742138976386, | |
| "learning_rate": 6.665582003977441e-05, | |
| "loss": 1.3244, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.7105263157894737, | |
| "grad_norm": 1.7323261915367696, | |
| "learning_rate": 6.654605594406486e-05, | |
| "loss": 1.3093, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.7157894736842105, | |
| "grad_norm": 1.7174315153551183, | |
| "learning_rate": 6.643593344915445e-05, | |
| "loss": 1.3141, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.7210526315789474, | |
| "grad_norm": 1.4498322250506395, | |
| "learning_rate": 6.632545404181074e-05, | |
| "loss": 1.3251, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.7263157894736842, | |
| "grad_norm": 2.978144373846546, | |
| "learning_rate": 6.62146192136199e-05, | |
| "loss": 1.3117, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.731578947368421, | |
| "grad_norm": 1.8288925620523002, | |
| "learning_rate": 6.610343046096674e-05, | |
| "loss": 1.311, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.736842105263158, | |
| "grad_norm": 2.8409045314260255, | |
| "learning_rate": 6.59918892850144e-05, | |
| "loss": 1.3263, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.7421052631578946, | |
| "grad_norm": 1.9806940703831386, | |
| "learning_rate": 6.587999719168401e-05, | |
| "loss": 1.3179, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.7473684210526317, | |
| "grad_norm": 2.231645147378468, | |
| "learning_rate": 6.576775569163458e-05, | |
| "loss": 1.3216, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.7526315789473683, | |
| "grad_norm": 2.242436351469589, | |
| "learning_rate": 6.565516630024236e-05, | |
| "loss": 1.3263, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.7578947368421054, | |
| "grad_norm": 1.418447898215289, | |
| "learning_rate": 6.554223053758055e-05, | |
| "loss": 1.317, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.763157894736842, | |
| "grad_norm": 2.1049377231565036, | |
| "learning_rate": 6.542894992839873e-05, | |
| "loss": 1.3278, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.768421052631579, | |
| "grad_norm": 1.9649271286389844, | |
| "learning_rate": 6.531532600210222e-05, | |
| "loss": 1.3309, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.7736842105263158, | |
| "grad_norm": 1.3002559482544591, | |
| "learning_rate": 6.520136029273151e-05, | |
| "loss": 1.3003, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.7789473684210526, | |
| "grad_norm": 2.4684068562448136, | |
| "learning_rate": 6.508705433894149e-05, | |
| "loss": 1.32, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.7842105263157895, | |
| "grad_norm": 1.5743465960197915, | |
| "learning_rate": 6.497240968398072e-05, | |
| "loss": 1.3006, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.7894736842105263, | |
| "grad_norm": 2.59770911193071, | |
| "learning_rate": 6.48574278756706e-05, | |
| "loss": 1.3222, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.7947368421052632, | |
| "grad_norm": 1.7842505149097647, | |
| "learning_rate": 6.474211046638438e-05, | |
| "loss": 1.3161, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 2.7074992552378805, | |
| "learning_rate": 6.462645901302633e-05, | |
| "loss": 1.3281, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.805263157894737, | |
| "grad_norm": 1.8215475542278567, | |
| "learning_rate": 6.451047507701065e-05, | |
| "loss": 1.3282, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.8105263157894735, | |
| "grad_norm": 3.304881983512875, | |
| "learning_rate": 6.439416022424036e-05, | |
| "loss": 1.3391, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.8157894736842106, | |
| "grad_norm": 3.2476105816930954, | |
| "learning_rate": 6.427751602508628e-05, | |
| "loss": 1.3348, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.8210526315789473, | |
| "grad_norm": 1.5869549786160873, | |
| "learning_rate": 6.416054405436564e-05, | |
| "loss": 1.3201, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.8263157894736843, | |
| "grad_norm": 2.2683887128326723, | |
| "learning_rate": 6.404324589132101e-05, | |
| "loss": 1.3204, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.831578947368421, | |
| "grad_norm": 1.9620950054062172, | |
| "learning_rate": 6.392562311959886e-05, | |
| "loss": 1.3158, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.836842105263158, | |
| "grad_norm": 1.8047773439892525, | |
| "learning_rate": 6.380767732722821e-05, | |
| "loss": 1.3181, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.8421052631578947, | |
| "grad_norm": 1.911531036771628, | |
| "learning_rate": 6.368941010659921e-05, | |
| "loss": 1.3292, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8473684210526315, | |
| "grad_norm": 1.636720765605733, | |
| "learning_rate": 6.35708230544416e-05, | |
| "loss": 1.3091, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.8526315789473684, | |
| "grad_norm": 1.424304904246396, | |
| "learning_rate": 6.34519177718032e-05, | |
| "loss": 1.3207, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.8578947368421053, | |
| "grad_norm": 1.3525184453889394, | |
| "learning_rate": 6.333269586402827e-05, | |
| "loss": 1.3125, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.8631578947368421, | |
| "grad_norm": 1.7281390731184902, | |
| "learning_rate": 6.321315894073581e-05, | |
| "loss": 1.3231, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.868421052631579, | |
| "grad_norm": 1.1089366431889842, | |
| "learning_rate": 6.309330861579786e-05, | |
| "loss": 1.3238, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.8736842105263158, | |
| "grad_norm": 1.876591117688095, | |
| "learning_rate": 6.297314650731775e-05, | |
| "loss": 1.3118, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.8789473684210525, | |
| "grad_norm": 1.61640922760774, | |
| "learning_rate": 6.285267423760817e-05, | |
| "loss": 1.3263, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.8842105263157896, | |
| "grad_norm": 1.4451990798983758, | |
| "learning_rate": 6.273189343316929e-05, | |
| "loss": 1.325, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.8894736842105262, | |
| "grad_norm": 1.3409307869705591, | |
| "learning_rate": 6.261080572466688e-05, | |
| "loss": 1.3057, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.8947368421052633, | |
| "grad_norm": 1.6052273256057499, | |
| "learning_rate": 6.248941274691017e-05, | |
| "loss": 1.3252, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 2.366978019871103, | |
| "learning_rate": 6.236771613882987e-05, | |
| "loss": 1.3179, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.905263157894737, | |
| "grad_norm": 1.1868922713453152, | |
| "learning_rate": 6.224571754345602e-05, | |
| "loss": 1.3082, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.9105263157894736, | |
| "grad_norm": 2.2556197419222612, | |
| "learning_rate": 6.21234186078958e-05, | |
| "loss": 1.3115, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.9157894736842105, | |
| "grad_norm": 1.7410078285379156, | |
| "learning_rate": 6.200082098331126e-05, | |
| "loss": 1.3281, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.9210526315789473, | |
| "grad_norm": 1.7950505417159182, | |
| "learning_rate": 6.18779263248971e-05, | |
| "loss": 1.3162, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.9263157894736842, | |
| "grad_norm": 1.8544654429983962, | |
| "learning_rate": 6.175473629185822e-05, | |
| "loss": 1.3205, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.931578947368421, | |
| "grad_norm": 1.7372962100479836, | |
| "learning_rate": 6.163125254738751e-05, | |
| "loss": 1.3065, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.936842105263158, | |
| "grad_norm": 2.242141298655648, | |
| "learning_rate": 6.150747675864314e-05, | |
| "loss": 1.2985, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.9421052631578948, | |
| "grad_norm": 1.481088212600443, | |
| "learning_rate": 6.138341059672622e-05, | |
| "loss": 1.3136, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.9473684210526314, | |
| "grad_norm": 2.1356181680202253, | |
| "learning_rate": 6.125905573665824e-05, | |
| "loss": 1.3282, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.9526315789473685, | |
| "grad_norm": 1.6259642009051207, | |
| "learning_rate": 6.113441385735836e-05, | |
| "loss": 1.3131, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.9578947368421051, | |
| "grad_norm": 1.973896595209029, | |
| "learning_rate": 6.100948664162081e-05, | |
| "loss": 1.3182, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.9631578947368422, | |
| "grad_norm": 1.616074252415091, | |
| "learning_rate": 6.088427577609219e-05, | |
| "loss": 1.3037, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.9684210526315788, | |
| "grad_norm": 1.777657189903051, | |
| "learning_rate": 6.075878295124861e-05, | |
| "loss": 1.3096, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.973684210526316, | |
| "grad_norm": 1.5325372367258376, | |
| "learning_rate": 6.063300986137297e-05, | |
| "loss": 1.3092, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.9789473684210526, | |
| "grad_norm": 1.7425893453777117, | |
| "learning_rate": 6.0506958204531996e-05, | |
| "loss": 1.3094, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.9842105263157894, | |
| "grad_norm": 1.2678177296707205, | |
| "learning_rate": 6.0380629682553395e-05, | |
| "loss": 1.2995, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.9894736842105263, | |
| "grad_norm": 2.083473793091378, | |
| "learning_rate": 6.025402600100283e-05, | |
| "loss": 1.3133, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.9947368421052631, | |
| "grad_norm": 1.5761354608098717, | |
| "learning_rate": 6.012714886916088e-05, | |
| "loss": 1.3232, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.6759654932294628, | |
| "learning_rate": 6.000000000000001e-05, | |
| "loss": 1.295, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.0052631578947366, | |
| "grad_norm": 1.6566803747791274, | |
| "learning_rate": 5.987258111016139e-05, | |
| "loss": 1.269, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.0105263157894737, | |
| "grad_norm": 1.7773404022104615, | |
| "learning_rate": 5.974489391993182e-05, | |
| "loss": 1.2756, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.0157894736842104, | |
| "grad_norm": 1.7734820944361986, | |
| "learning_rate": 5.9616940153220336e-05, | |
| "loss": 1.3024, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.0210526315789474, | |
| "grad_norm": 1.3832861885005663, | |
| "learning_rate": 5.948872153753509e-05, | |
| "loss": 1.292, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.026315789473684, | |
| "grad_norm": 1.9267936406726134, | |
| "learning_rate": 5.936023980395997e-05, | |
| "loss": 1.2974, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.031578947368421, | |
| "grad_norm": 1.1634482135657338, | |
| "learning_rate": 5.923149668713118e-05, | |
| "loss": 1.2864, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.036842105263158, | |
| "grad_norm": 1.4709242560357587, | |
| "learning_rate": 5.9102493925213946e-05, | |
| "loss": 1.2719, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.042105263157895, | |
| "grad_norm": 1.3716802116661737, | |
| "learning_rate": 5.8973233259878914e-05, | |
| "loss": 1.2688, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.0473684210526315, | |
| "grad_norm": 1.8540565451199285, | |
| "learning_rate": 5.8843716436278696e-05, | |
| "loss": 1.292, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.0526315789473686, | |
| "grad_norm": 1.6569753347566705, | |
| "learning_rate": 5.871394520302432e-05, | |
| "loss": 1.2923, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.057894736842105, | |
| "grad_norm": 1.3344056097550805, | |
| "learning_rate": 5.85839213121616e-05, | |
| "loss": 1.2783, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.0631578947368423, | |
| "grad_norm": 1.0756154226095422, | |
| "learning_rate": 5.845364651914752e-05, | |
| "loss": 1.2823, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.068421052631579, | |
| "grad_norm": 1.5938788684018976, | |
| "learning_rate": 5.832312258282645e-05, | |
| "loss": 1.2872, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.0736842105263156, | |
| "grad_norm": 1.9960858248177353, | |
| "learning_rate": 5.8192351265406466e-05, | |
| "loss": 1.2819, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.0789473684210527, | |
| "grad_norm": 1.500794950504469, | |
| "learning_rate": 5.806133433243558e-05, | |
| "loss": 1.3018, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.0842105263157893, | |
| "grad_norm": 1.9776756621227738, | |
| "learning_rate": 5.793007355277783e-05, | |
| "loss": 1.2947, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.0894736842105264, | |
| "grad_norm": 1.5136499333830533, | |
| "learning_rate": 5.7798570698589465e-05, | |
| "loss": 1.2847, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.094736842105263, | |
| "grad_norm": 2.7888675821510467, | |
| "learning_rate": 5.7666827545294965e-05, | |
| "loss": 1.2803, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 1.997214443213332, | |
| "learning_rate": 5.75348458715631e-05, | |
| "loss": 1.2889, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 2.636705031350177, | |
| "learning_rate": 5.740262745928293e-05, | |
| "loss": 1.2964, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.110526315789474, | |
| "grad_norm": 2.1100662062402775, | |
| "learning_rate": 5.727017409353971e-05, | |
| "loss": 1.2878, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.1157894736842104, | |
| "grad_norm": 1.9907403756995032, | |
| "learning_rate": 5.713748756259085e-05, | |
| "loss": 1.2942, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.1210526315789475, | |
| "grad_norm": 2.0618250265894433, | |
| "learning_rate": 5.700456965784167e-05, | |
| "loss": 1.2857, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.126315789473684, | |
| "grad_norm": 0.9319289242731835, | |
| "learning_rate": 5.687142217382129e-05, | |
| "loss": 1.2708, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.1315789473684212, | |
| "grad_norm": 2.992807432876805, | |
| "learning_rate": 5.673804690815845e-05, | |
| "loss": 1.309, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.136842105263158, | |
| "grad_norm": 1.864224068302743, | |
| "learning_rate": 5.660444566155709e-05, | |
| "loss": 1.2854, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.1421052631578945, | |
| "grad_norm": 3.4214971672572596, | |
| "learning_rate": 5.647062023777221e-05, | |
| "loss": 1.2927, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.1473684210526316, | |
| "grad_norm": 2.508109225516717, | |
| "learning_rate": 5.633657244358535e-05, | |
| "loss": 1.2829, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.1526315789473682, | |
| "grad_norm": 3.929215425642367, | |
| "learning_rate": 5.6202304088780335e-05, | |
| "loss": 1.2946, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.1578947368421053, | |
| "grad_norm": 3.798085324745515, | |
| "learning_rate": 5.606781698611879e-05, | |
| "loss": 1.3013, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.163157894736842, | |
| "grad_norm": 2.0206510011506222, | |
| "learning_rate": 5.593311295131562e-05, | |
| "loss": 1.2917, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.168421052631579, | |
| "grad_norm": 3.092904855930784, | |
| "learning_rate": 5.579819380301458e-05, | |
| "loss": 1.2795, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.1736842105263157, | |
| "grad_norm": 2.2386119816911623, | |
| "learning_rate": 5.5663061362763665e-05, | |
| "loss": 1.2964, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.1789473684210527, | |
| "grad_norm": 2.8845075274191223, | |
| "learning_rate": 5.552771745499051e-05, | |
| "loss": 1.29, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.1842105263157894, | |
| "grad_norm": 2.355680412049654, | |
| "learning_rate": 5.5392163906977835e-05, | |
| "loss": 1.2802, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.1894736842105265, | |
| "grad_norm": 2.577365756174829, | |
| "learning_rate": 5.525640254883865e-05, | |
| "loss": 1.2894, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.194736842105263, | |
| "grad_norm": 2.0046660703267576, | |
| "learning_rate": 5.512043521349166e-05, | |
| "loss": 1.2873, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 2.576417069207704, | |
| "learning_rate": 5.4984263736636494e-05, | |
| "loss": 1.2759, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.205263157894737, | |
| "grad_norm": 1.9511016222282593, | |
| "learning_rate": 5.4847889956728834e-05, | |
| "loss": 1.298, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.2105263157894735, | |
| "grad_norm": 2.6670388356828285, | |
| "learning_rate": 5.471131571495574e-05, | |
| "loss": 1.2951, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.2157894736842105, | |
| "grad_norm": 2.126207826369636, | |
| "learning_rate": 5.457454285521064e-05, | |
| "loss": 1.2812, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.221052631578947, | |
| "grad_norm": 2.400820316806507, | |
| "learning_rate": 5.4437573224068595e-05, | |
| "loss": 1.2948, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.2263157894736842, | |
| "grad_norm": 1.7795504525185426, | |
| "learning_rate": 5.4300408670761204e-05, | |
| "loss": 1.2959, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.231578947368421, | |
| "grad_norm": 2.8829533810849663, | |
| "learning_rate": 5.416305104715175e-05, | |
| "loss": 1.3074, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.236842105263158, | |
| "grad_norm": 2.159110693359624, | |
| "learning_rate": 5.4025502207710184e-05, | |
| "loss": 1.2797, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.2421052631578946, | |
| "grad_norm": 3.0783424431722666, | |
| "learning_rate": 5.388776400948803e-05, | |
| "loss": 1.2864, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.2473684210526317, | |
| "grad_norm": 2.640451536165918, | |
| "learning_rate": 5.3749838312093364e-05, | |
| "loss": 1.2987, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.2526315789473683, | |
| "grad_norm": 2.413356511304884, | |
| "learning_rate": 5.361172697766573e-05, | |
| "loss": 1.2775, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.2578947368421054, | |
| "grad_norm": 2.334518355071201, | |
| "learning_rate": 5.3473431870850904e-05, | |
| "loss": 1.275, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.263157894736842, | |
| "grad_norm": 2.4122426514527984, | |
| "learning_rate": 5.333495485877583e-05, | |
| "loss": 1.2961, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.268421052631579, | |
| "grad_norm": 2.2362176845592208, | |
| "learning_rate": 5.3196297811023316e-05, | |
| "loss": 1.2937, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.2736842105263158, | |
| "grad_norm": 2.293429615790018, | |
| "learning_rate": 5.305746259960689e-05, | |
| "loss": 1.2852, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.2789473684210524, | |
| "grad_norm": 1.786254087945556, | |
| "learning_rate": 5.291845109894544e-05, | |
| "loss": 1.2799, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.2842105263157895, | |
| "grad_norm": 2.6945565744818407, | |
| "learning_rate": 5.277926518583793e-05, | |
| "loss": 1.2921, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.2894736842105265, | |
| "grad_norm": 2.2194659113523962, | |
| "learning_rate": 5.263990673943811e-05, | |
| "loss": 1.3046, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.294736842105263, | |
| "grad_norm": 2.547332541805207, | |
| "learning_rate": 5.250037764122907e-05, | |
| "loss": 1.2842, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 2.2417529148390325, | |
| "learning_rate": 5.23606797749979e-05, | |
| "loss": 1.2737, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.305263157894737, | |
| "grad_norm": 2.297738957213229, | |
| "learning_rate": 5.2220815026810234e-05, | |
| "loss": 1.2964, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.3105263157894735, | |
| "grad_norm": 2.0161964482231416, | |
| "learning_rate": 5.208078528498476e-05, | |
| "loss": 1.2734, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.3157894736842106, | |
| "grad_norm": 2.5589773064425896, | |
| "learning_rate": 5.194059244006779e-05, | |
| "loss": 1.3239, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.3210526315789473, | |
| "grad_norm": 2.5534996515172246, | |
| "learning_rate": 5.180023838480765e-05, | |
| "loss": 1.2839, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.3263157894736843, | |
| "grad_norm": 1.6213064440253335, | |
| "learning_rate": 5.165972501412921e-05, | |
| "loss": 1.2804, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.331578947368421, | |
| "grad_norm": 1.3934815609685396, | |
| "learning_rate": 5.151905422510825e-05, | |
| "loss": 1.2733, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.336842105263158, | |
| "grad_norm": 2.6675021957708447, | |
| "learning_rate": 5.137822791694585e-05, | |
| "loss": 1.2847, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.3421052631578947, | |
| "grad_norm": 2.0956189405862027, | |
| "learning_rate": 5.123724799094279e-05, | |
| "loss": 1.2705, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.3473684210526318, | |
| "grad_norm": 2.419629528561346, | |
| "learning_rate": 5.109611635047379e-05, | |
| "loss": 1.2879, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.3526315789473684, | |
| "grad_norm": 2.4104356876557755, | |
| "learning_rate": 5.095483490096194e-05, | |
| "loss": 1.2935, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.3578947368421055, | |
| "grad_norm": 1.8777710470149278, | |
| "learning_rate": 5.081340554985287e-05, | |
| "loss": 1.2775, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.363157894736842, | |
| "grad_norm": 1.487851978844643, | |
| "learning_rate": 5.067183020658905e-05, | |
| "loss": 1.2761, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.3684210526315788, | |
| "grad_norm": 2.4549351462483586, | |
| "learning_rate": 5.053011078258397e-05, | |
| "loss": 1.2692, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.373684210526316, | |
| "grad_norm": 1.8656568337670418, | |
| "learning_rate": 5.03882491911964e-05, | |
| "loss": 1.2911, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.3789473684210525, | |
| "grad_norm": 2.7641450741813265, | |
| "learning_rate": 5.024624734770446e-05, | |
| "loss": 1.2735, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.3842105263157896, | |
| "grad_norm": 2.722572050222999, | |
| "learning_rate": 5.010410716927988e-05, | |
| "loss": 1.2737, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.389473684210526, | |
| "grad_norm": 1.428940654329024, | |
| "learning_rate": 4.9961830574962e-05, | |
| "loss": 1.2888, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.3947368421052633, | |
| "grad_norm": 1.5780962302368826, | |
| "learning_rate": 4.981941948563197e-05, | |
| "loss": 1.2812, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 2.0344826746474283, | |
| "learning_rate": 4.967687582398671e-05, | |
| "loss": 1.2864, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.405263157894737, | |
| "grad_norm": 1.4319688356833826, | |
| "learning_rate": 4.953420151451304e-05, | |
| "loss": 1.2834, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.4105263157894736, | |
| "grad_norm": 2.601310717014097, | |
| "learning_rate": 4.939139848346164e-05, | |
| "loss": 1.2823, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.4157894736842107, | |
| "grad_norm": 2.348235653569354, | |
| "learning_rate": 4.924846865882107e-05, | |
| "loss": 1.2846, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.4210526315789473, | |
| "grad_norm": 1.726129892949758, | |
| "learning_rate": 4.9105413970291747e-05, | |
| "loss": 1.3011, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.4263157894736844, | |
| "grad_norm": 1.7111299716712474, | |
| "learning_rate": 4.896223634925984e-05, | |
| "loss": 1.3116, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.431578947368421, | |
| "grad_norm": 1.803266671516624, | |
| "learning_rate": 4.8818937728771294e-05, | |
| "loss": 1.272, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.4368421052631577, | |
| "grad_norm": 1.3759594624212466, | |
| "learning_rate": 4.867552004350564e-05, | |
| "loss": 1.289, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.442105263157895, | |
| "grad_norm": 2.3296183382561253, | |
| "learning_rate": 4.853198522974988e-05, | |
| "loss": 1.2911, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.4473684210526314, | |
| "grad_norm": 1.9742731162641471, | |
| "learning_rate": 4.8388335225372416e-05, | |
| "loss": 1.2656, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.4526315789473685, | |
| "grad_norm": 1.6842871202377092, | |
| "learning_rate": 4.8244571969796817e-05, | |
| "loss": 1.2891, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.457894736842105, | |
| "grad_norm": 1.5497120892994825, | |
| "learning_rate": 4.810069740397569e-05, | |
| "loss": 1.2844, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.463157894736842, | |
| "grad_norm": 1.824870478700358, | |
| "learning_rate": 4.795671347036439e-05, | |
| "loss": 1.2902, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.468421052631579, | |
| "grad_norm": 1.3990180325007069, | |
| "learning_rate": 4.781262211289491e-05, | |
| "loss": 1.281, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.473684210526316, | |
| "grad_norm": 2.320520504849803, | |
| "learning_rate": 4.7668425276949546e-05, | |
| "loss": 1.2838, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.4789473684210526, | |
| "grad_norm": 2.1234854667075758, | |
| "learning_rate": 4.7524124909334653e-05, | |
| "loss": 1.2797, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.4842105263157896, | |
| "grad_norm": 1.422390194322296, | |
| "learning_rate": 4.7379722958254394e-05, | |
| "loss": 1.2896, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.4894736842105263, | |
| "grad_norm": 1.3196019422752756, | |
| "learning_rate": 4.7235221373284407e-05, | |
| "loss": 1.2744, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.4947368421052634, | |
| "grad_norm": 1.7461193994213873, | |
| "learning_rate": 4.709062210534547e-05, | |
| "loss": 1.2887, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 1.3331786938242027, | |
| "learning_rate": 4.694592710667723e-05, | |
| "loss": 1.281, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.5052631578947366, | |
| "grad_norm": 2.0104890922006464, | |
| "learning_rate": 4.680113833081173e-05, | |
| "loss": 1.2786, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.5105263157894737, | |
| "grad_norm": 1.92734853764442, | |
| "learning_rate": 4.665625773254716e-05, | |
| "loss": 1.2844, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.515789473684211, | |
| "grad_norm": 1.3368646117339327, | |
| "learning_rate": 4.6511287267921394e-05, | |
| "loss": 1.2944, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.5210526315789474, | |
| "grad_norm": 1.158860061239002, | |
| "learning_rate": 4.636622889418558e-05, | |
| "loss": 1.2728, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.526315789473684, | |
| "grad_norm": 1.7510317771973813, | |
| "learning_rate": 4.622108456977773e-05, | |
| "loss": 1.2752, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.531578947368421, | |
| "grad_norm": 1.3943091910623553, | |
| "learning_rate": 4.60758562542963e-05, | |
| "loss": 1.3005, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.536842105263158, | |
| "grad_norm": 1.8549674666458555, | |
| "learning_rate": 4.593054590847368e-05, | |
| "loss": 1.281, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.542105263157895, | |
| "grad_norm": 1.7188026241177852, | |
| "learning_rate": 4.57851554941498e-05, | |
| "loss": 1.3061, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.5473684210526315, | |
| "grad_norm": 1.2685249463251793, | |
| "learning_rate": 4.563968697424553e-05, | |
| "loss": 1.2822, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.5526315789473686, | |
| "grad_norm": 1.549673702485011, | |
| "learning_rate": 4.549414231273633e-05, | |
| "loss": 1.2958, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.557894736842105, | |
| "grad_norm": 1.0822272735204688, | |
| "learning_rate": 4.534852347462559e-05, | |
| "loss": 1.2829, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.5631578947368423, | |
| "grad_norm": 0.9555789360037702, | |
| "learning_rate": 4.5202832425918166e-05, | |
| "loss": 1.3051, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.568421052631579, | |
| "grad_norm": 1.067488479666183, | |
| "learning_rate": 4.5057071133593853e-05, | |
| "loss": 1.275, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.5736842105263156, | |
| "grad_norm": 1.3214859289777758, | |
| "learning_rate": 4.4911241565580796e-05, | |
| "loss": 1.2887, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.5789473684210527, | |
| "grad_norm": 1.309909912707582, | |
| "learning_rate": 4.476534569072895e-05, | |
| "loss": 1.2933, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.5842105263157897, | |
| "grad_norm": 1.103182457932209, | |
| "learning_rate": 4.4619385478783456e-05, | |
| "loss": 1.2785, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.5894736842105264, | |
| "grad_norm": 1.050641547852913, | |
| "learning_rate": 4.4473362900358065e-05, | |
| "loss": 1.2877, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.594736842105263, | |
| "grad_norm": 1.1472680394377797, | |
| "learning_rate": 4.432727992690857e-05, | |
| "loss": 1.285, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.362629738278887, | |
| "learning_rate": 4.418113853070614e-05, | |
| "loss": 1.2774, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.6052631578947367, | |
| "grad_norm": 0.9212638160971107, | |
| "learning_rate": 4.403494068481074e-05, | |
| "loss": 1.2956, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.610526315789474, | |
| "grad_norm": 1.3060601473810125, | |
| "learning_rate": 4.388868836304442e-05, | |
| "loss": 1.2864, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.6157894736842104, | |
| "grad_norm": 0.9964781716303204, | |
| "learning_rate": 4.374238353996472e-05, | |
| "loss": 1.2846, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.6210526315789475, | |
| "grad_norm": 1.0334496502405375, | |
| "learning_rate": 4.3596028190838045e-05, | |
| "loss": 1.2751, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.626315789473684, | |
| "grad_norm": 0.8374244946961393, | |
| "learning_rate": 4.3449624291612895e-05, | |
| "loss": 1.2846, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.6315789473684212, | |
| "grad_norm": 1.271324423070232, | |
| "learning_rate": 4.33031738188933e-05, | |
| "loss": 1.2893, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.636842105263158, | |
| "grad_norm": 1.0290842796861808, | |
| "learning_rate": 4.315667874991205e-05, | |
| "loss": 1.2769, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.6421052631578945, | |
| "grad_norm": 1.6287387587682205, | |
| "learning_rate": 4.3010141062504e-05, | |
| "loss": 1.2808, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.6473684210526316, | |
| "grad_norm": 1.05633292251089, | |
| "learning_rate": 4.286356273507949e-05, | |
| "loss": 1.2752, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.6526315789473687, | |
| "grad_norm": 1.2639851855275497, | |
| "learning_rate": 4.271694574659744e-05, | |
| "loss": 1.2673, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.6578947368421053, | |
| "grad_norm": 1.3942797511952854, | |
| "learning_rate": 4.257029207653881e-05, | |
| "loss": 1.2725, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.663157894736842, | |
| "grad_norm": 0.9618496184388876, | |
| "learning_rate": 4.242360370487976e-05, | |
| "loss": 1.2747, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.668421052631579, | |
| "grad_norm": 1.141131756163478, | |
| "learning_rate": 4.2276882612064936e-05, | |
| "loss": 1.3005, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.6736842105263157, | |
| "grad_norm": 0.9471533928078693, | |
| "learning_rate": 4.213013077898084e-05, | |
| "loss": 1.2726, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.6789473684210527, | |
| "grad_norm": 0.9237490499511763, | |
| "learning_rate": 4.1983350186928894e-05, | |
| "loss": 1.2801, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.6842105263157894, | |
| "grad_norm": 1.1110718726310678, | |
| "learning_rate": 4.183654281759888e-05, | |
| "loss": 1.2674, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.6894736842105265, | |
| "grad_norm": 1.2581508468500637, | |
| "learning_rate": 4.168971065304205e-05, | |
| "loss": 1.2809, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.694736842105263, | |
| "grad_norm": 0.9468779696627782, | |
| "learning_rate": 4.154285567564442e-05, | |
| "loss": 1.2796, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 1.3134333293058689, | |
| "learning_rate": 4.139597986810005e-05, | |
| "loss": 1.2698, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.705263157894737, | |
| "grad_norm": 0.9243484356050305, | |
| "learning_rate": 4.124908521338416e-05, | |
| "loss": 1.2745, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.7105263157894735, | |
| "grad_norm": 0.9728548533723144, | |
| "learning_rate": 4.110217369472649e-05, | |
| "loss": 1.2925, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.7157894736842105, | |
| "grad_norm": 0.7367530477112044, | |
| "learning_rate": 4.095524729558441e-05, | |
| "loss": 1.2677, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.7210526315789476, | |
| "grad_norm": 0.9279276228473495, | |
| "learning_rate": 4.080830799961622e-05, | |
| "loss": 1.2802, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.7263157894736842, | |
| "grad_norm": 1.3931410811444014, | |
| "learning_rate": 4.0661357790654345e-05, | |
| "loss": 1.262, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.731578947368421, | |
| "grad_norm": 0.9883528178224094, | |
| "learning_rate": 4.0514398652678514e-05, | |
| "loss": 1.2964, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.736842105263158, | |
| "grad_norm": 1.3163227077320665, | |
| "learning_rate": 4.0367432569789065e-05, | |
| "loss": 1.2805, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.7421052631578946, | |
| "grad_norm": 0.7397578201219466, | |
| "learning_rate": 4.0220461526180023e-05, | |
| "loss": 1.2773, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.7473684210526317, | |
| "grad_norm": 0.9990315603511356, | |
| "learning_rate": 4.007348750611245e-05, | |
| "loss": 1.292, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.7526315789473683, | |
| "grad_norm": 1.1681928253101022, | |
| "learning_rate": 3.9926512493887555e-05, | |
| "loss": 1.2893, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.7578947368421054, | |
| "grad_norm": 1.1014263843169803, | |
| "learning_rate": 3.977953847381998e-05, | |
| "loss": 1.2715, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.763157894736842, | |
| "grad_norm": 1.1159951506001466, | |
| "learning_rate": 3.963256743021095e-05, | |
| "loss": 1.2785, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.768421052631579, | |
| "grad_norm": 1.2507849560008404, | |
| "learning_rate": 3.9485601347321486e-05, | |
| "loss": 1.2906, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.7736842105263158, | |
| "grad_norm": 0.8936481542314029, | |
| "learning_rate": 3.933864220934566e-05, | |
| "loss": 1.2669, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.7789473684210524, | |
| "grad_norm": 0.9849739951015418, | |
| "learning_rate": 3.919169200038379e-05, | |
| "loss": 1.2771, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.7842105263157895, | |
| "grad_norm": 0.837609493415301, | |
| "learning_rate": 3.904475270441561e-05, | |
| "loss": 1.266, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.7894736842105265, | |
| "grad_norm": 0.7219038051900546, | |
| "learning_rate": 3.889782630527353e-05, | |
| "loss": 1.2726, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.794736842105263, | |
| "grad_norm": 0.7505423536800923, | |
| "learning_rate": 3.875091478661585e-05, | |
| "loss": 1.2703, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.7301064067667716, | |
| "learning_rate": 3.860402013189998e-05, | |
| "loss": 1.2812, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.805263157894737, | |
| "grad_norm": 0.822332511103024, | |
| "learning_rate": 3.845714432435558e-05, | |
| "loss": 1.2718, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.8105263157894735, | |
| "grad_norm": 0.7783007350783687, | |
| "learning_rate": 3.8310289346957965e-05, | |
| "loss": 1.2574, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.8157894736842106, | |
| "grad_norm": 0.957233585531184, | |
| "learning_rate": 3.816345718240113e-05, | |
| "loss": 1.2805, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.8210526315789473, | |
| "grad_norm": 1.1226403614971794, | |
| "learning_rate": 3.8016649813071106e-05, | |
| "loss": 1.2983, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.8263157894736843, | |
| "grad_norm": 1.306496898793406, | |
| "learning_rate": 3.7869869221019177e-05, | |
| "loss": 1.2727, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.831578947368421, | |
| "grad_norm": 0.5971018924485769, | |
| "learning_rate": 3.772311738793507e-05, | |
| "loss": 1.2834, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.836842105263158, | |
| "grad_norm": 0.9138436747802899, | |
| "learning_rate": 3.757639629512026e-05, | |
| "loss": 1.2871, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.8421052631578947, | |
| "grad_norm": 1.2871243761882003, | |
| "learning_rate": 3.74297079234612e-05, | |
| "loss": 1.2797, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.8473684210526313, | |
| "grad_norm": 0.951777404285425, | |
| "learning_rate": 3.7283054253402574e-05, | |
| "loss": 1.2754, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.8526315789473684, | |
| "grad_norm": 0.9411344162055937, | |
| "learning_rate": 3.713643726492053e-05, | |
| "loss": 1.2721, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.8578947368421055, | |
| "grad_norm": 1.317846119765965, | |
| "learning_rate": 3.698985893749599e-05, | |
| "loss": 1.2887, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.863157894736842, | |
| "grad_norm": 0.8089940300375972, | |
| "learning_rate": 3.6843321250087966e-05, | |
| "loss": 1.2848, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.8684210526315788, | |
| "grad_norm": 0.5361830354797774, | |
| "learning_rate": 3.669682618110671e-05, | |
| "loss": 1.2657, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.873684210526316, | |
| "grad_norm": 0.8183317690070797, | |
| "learning_rate": 3.655037570838711e-05, | |
| "loss": 1.2866, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.8789473684210525, | |
| "grad_norm": 1.2353845817797051, | |
| "learning_rate": 3.640397180916197e-05, | |
| "loss": 1.2806, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.8842105263157896, | |
| "grad_norm": 1.275438903457986, | |
| "learning_rate": 3.62576164600353e-05, | |
| "loss": 1.3042, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.889473684210526, | |
| "grad_norm": 0.5384567882124249, | |
| "learning_rate": 3.611131163695561e-05, | |
| "loss": 1.2689, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.8947368421052633, | |
| "grad_norm": 0.7803333541473619, | |
| "learning_rate": 3.5965059315189274e-05, | |
| "loss": 1.2797, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.4689702016559818, | |
| "learning_rate": 3.581886146929387e-05, | |
| "loss": 1.2648, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.905263157894737, | |
| "grad_norm": 0.6546922206210783, | |
| "learning_rate": 3.567272007309145e-05, | |
| "loss": 1.279, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.9105263157894736, | |
| "grad_norm": 0.706490220466843, | |
| "learning_rate": 3.552663709964194e-05, | |
| "loss": 1.2735, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.9157894736842103, | |
| "grad_norm": 1.3658529786651115, | |
| "learning_rate": 3.538061452121656e-05, | |
| "loss": 1.2916, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.9210526315789473, | |
| "grad_norm": 0.9489706293231372, | |
| "learning_rate": 3.523465430927106e-05, | |
| "loss": 1.2918, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.9263157894736844, | |
| "grad_norm": 0.8764748345395458, | |
| "learning_rate": 3.50887584344192e-05, | |
| "loss": 1.3015, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.931578947368421, | |
| "grad_norm": 0.5355041821626928, | |
| "learning_rate": 3.494292886640615e-05, | |
| "loss": 1.2751, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.9368421052631577, | |
| "grad_norm": 0.9242505060104863, | |
| "learning_rate": 3.479716757408185e-05, | |
| "loss": 1.2819, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.942105263157895, | |
| "grad_norm": 1.2554507748275814, | |
| "learning_rate": 3.465147652537443e-05, | |
| "loss": 1.276, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.9473684210526314, | |
| "grad_norm": 0.9095910936651702, | |
| "learning_rate": 3.4505857687263675e-05, | |
| "loss": 1.2753, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.9526315789473685, | |
| "grad_norm": 0.6651860681009616, | |
| "learning_rate": 3.4360313025754476e-05, | |
| "loss": 1.2695, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.957894736842105, | |
| "grad_norm": 0.9291042715583687, | |
| "learning_rate": 3.421484450585023e-05, | |
| "loss": 1.2961, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.963157894736842, | |
| "grad_norm": 1.3279167632034623, | |
| "learning_rate": 3.406945409152632e-05, | |
| "loss": 1.2858, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.968421052631579, | |
| "grad_norm": 0.5900225146576717, | |
| "learning_rate": 3.392414374570371e-05, | |
| "loss": 1.2786, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.973684210526316, | |
| "grad_norm": 0.8721553828236943, | |
| "learning_rate": 3.377891543022229e-05, | |
| "loss": 1.2712, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.9789473684210526, | |
| "grad_norm": 1.0505950441437681, | |
| "learning_rate": 3.363377110581442e-05, | |
| "loss": 1.2719, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.984210526315789, | |
| "grad_norm": 1.3040980751717048, | |
| "learning_rate": 3.348871273207861e-05, | |
| "loss": 1.2961, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.9894736842105263, | |
| "grad_norm": 0.7750168697948207, | |
| "learning_rate": 3.334374226745285e-05, | |
| "loss": 1.287, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.9947368421052634, | |
| "grad_norm": 0.797465388668966, | |
| "learning_rate": 3.319886166918829e-05, | |
| "loss": 1.2798, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 0.7195120795363182, | |
| "learning_rate": 3.305407289332279e-05, | |
| "loss": 1.2516, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.0052631578947366, | |
| "grad_norm": 0.9683227012264453, | |
| "learning_rate": 3.290937789465454e-05, | |
| "loss": 1.245, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 3.0105263157894737, | |
| "grad_norm": 1.0758712880323096, | |
| "learning_rate": 3.276477862671562e-05, | |
| "loss": 1.2628, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 3.0157894736842104, | |
| "grad_norm": 0.9883859720793654, | |
| "learning_rate": 3.262027704174561e-05, | |
| "loss": 1.2509, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 3.0210526315789474, | |
| "grad_norm": 0.9615705195781193, | |
| "learning_rate": 3.247587509066535e-05, | |
| "loss": 1.264, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 3.026315789473684, | |
| "grad_norm": 0.7910888117572926, | |
| "learning_rate": 3.2331574723050474e-05, | |
| "loss": 1.2454, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.031578947368421, | |
| "grad_norm": 0.6384643157937262, | |
| "learning_rate": 3.218737788710509e-05, | |
| "loss": 1.2538, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 3.036842105263158, | |
| "grad_norm": 0.5368984975332023, | |
| "learning_rate": 3.2043286529635614e-05, | |
| "loss": 1.2587, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 3.042105263157895, | |
| "grad_norm": 0.6163486545813159, | |
| "learning_rate": 3.189930259602433e-05, | |
| "loss": 1.2452, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 3.0473684210526315, | |
| "grad_norm": 0.72931652275654, | |
| "learning_rate": 3.175542803020319e-05, | |
| "loss": 1.2414, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 3.0526315789473686, | |
| "grad_norm": 0.8975216647677429, | |
| "learning_rate": 3.161166477462759e-05, | |
| "loss": 1.2562, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.057894736842105, | |
| "grad_norm": 0.6779341882405682, | |
| "learning_rate": 3.146801477025013e-05, | |
| "loss": 1.259, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 3.0631578947368423, | |
| "grad_norm": 0.35876586554623435, | |
| "learning_rate": 3.132447995649438e-05, | |
| "loss": 1.2439, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 3.068421052631579, | |
| "grad_norm": 0.4633889396883284, | |
| "learning_rate": 3.11810622712287e-05, | |
| "loss": 1.2443, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 3.0736842105263156, | |
| "grad_norm": 0.37725222875354836, | |
| "learning_rate": 3.103776365074017e-05, | |
| "loss": 1.244, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 3.0789473684210527, | |
| "grad_norm": 0.48272932389464906, | |
| "learning_rate": 3.089458602970828e-05, | |
| "loss": 1.2509, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.0842105263157893, | |
| "grad_norm": 0.5490877544866288, | |
| "learning_rate": 3.075153134117893e-05, | |
| "loss": 1.264, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 3.0894736842105264, | |
| "grad_norm": 0.4207792008385385, | |
| "learning_rate": 3.060860151653837e-05, | |
| "loss": 1.2519, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 3.094736842105263, | |
| "grad_norm": 0.44337087568296857, | |
| "learning_rate": 3.046579848548697e-05, | |
| "loss": 1.2387, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "grad_norm": 0.4345387632778484, | |
| "learning_rate": 3.0323124176013297e-05, | |
| "loss": 1.2471, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 3.1052631578947367, | |
| "grad_norm": 0.34646556904906206, | |
| "learning_rate": 3.0180580514368037e-05, | |
| "loss": 1.2574, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.110526315789474, | |
| "grad_norm": 0.5480997618564871, | |
| "learning_rate": 3.0038169425038007e-05, | |
| "loss": 1.2483, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 3.1157894736842104, | |
| "grad_norm": 0.3643135054525602, | |
| "learning_rate": 2.9895892830720137e-05, | |
| "loss": 1.2586, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 3.1210526315789475, | |
| "grad_norm": 0.3565417102254677, | |
| "learning_rate": 2.9753752652295538e-05, | |
| "loss": 1.2391, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 3.126315789473684, | |
| "grad_norm": 0.4591251447075665, | |
| "learning_rate": 2.961175080880362e-05, | |
| "loss": 1.2496, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 3.1315789473684212, | |
| "grad_norm": 0.46367424343953406, | |
| "learning_rate": 2.9469889217416045e-05, | |
| "loss": 1.2466, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.136842105263158, | |
| "grad_norm": 0.3997287611132656, | |
| "learning_rate": 2.9328169793410954e-05, | |
| "loss": 1.2458, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 3.1421052631578945, | |
| "grad_norm": 0.39902269987295985, | |
| "learning_rate": 2.918659445014713e-05, | |
| "loss": 1.2415, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 3.1473684210526316, | |
| "grad_norm": 0.3404609072909432, | |
| "learning_rate": 2.9045165099038066e-05, | |
| "loss": 1.2631, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 3.1526315789473682, | |
| "grad_norm": 0.3396481784449944, | |
| "learning_rate": 2.890388364952623e-05, | |
| "loss": 1.2548, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 3.1578947368421053, | |
| "grad_norm": 0.37782629450525207, | |
| "learning_rate": 2.8762752009057232e-05, | |
| "loss": 1.2617, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.163157894736842, | |
| "grad_norm": 0.5589592818613579, | |
| "learning_rate": 2.8621772083054157e-05, | |
| "loss": 1.2594, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 3.168421052631579, | |
| "grad_norm": 0.42966453281721634, | |
| "learning_rate": 2.8480945774891764e-05, | |
| "loss": 1.2413, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 3.1736842105263157, | |
| "grad_norm": 0.37959826731834306, | |
| "learning_rate": 2.83402749858708e-05, | |
| "loss": 1.2509, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 3.1789473684210527, | |
| "grad_norm": 0.4661717251353946, | |
| "learning_rate": 2.819976161519236e-05, | |
| "loss": 1.2629, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 3.1842105263157894, | |
| "grad_norm": 0.31707150511990617, | |
| "learning_rate": 2.805940755993223e-05, | |
| "loss": 1.2446, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.1894736842105265, | |
| "grad_norm": 0.3596061389333874, | |
| "learning_rate": 2.7919214715015236e-05, | |
| "loss": 1.2487, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 3.194736842105263, | |
| "grad_norm": 0.3125529192407293, | |
| "learning_rate": 2.7779184973189773e-05, | |
| "loss": 1.2575, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.39929761965783167, | |
| "learning_rate": 2.7639320225002108e-05, | |
| "loss": 1.2563, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 3.205263157894737, | |
| "grad_norm": 0.357481051645098, | |
| "learning_rate": 2.7499622358770936e-05, | |
| "loss": 1.2399, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 3.2105263157894735, | |
| "grad_norm": 0.3253036562346321, | |
| "learning_rate": 2.7360093260561904e-05, | |
| "loss": 1.2587, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.2157894736842105, | |
| "grad_norm": 1.0866422737509416, | |
| "learning_rate": 2.722073481416208e-05, | |
| "loss": 1.253, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 3.221052631578947, | |
| "grad_norm": 0.3704975946750915, | |
| "learning_rate": 2.7081548901054574e-05, | |
| "loss": 1.2449, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 3.2263157894736842, | |
| "grad_norm": 0.39681349064147786, | |
| "learning_rate": 2.6942537400393117e-05, | |
| "loss": 1.2393, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 3.231578947368421, | |
| "grad_norm": 0.38789399072411884, | |
| "learning_rate": 2.680370218897669e-05, | |
| "loss": 1.2476, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 3.236842105263158, | |
| "grad_norm": 0.6056318300360599, | |
| "learning_rate": 2.6665045141224193e-05, | |
| "loss": 1.2498, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.2421052631578946, | |
| "grad_norm": 0.5268591378002944, | |
| "learning_rate": 2.6526568129149103e-05, | |
| "loss": 1.2509, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 3.2473684210526317, | |
| "grad_norm": 0.5275312902783164, | |
| "learning_rate": 2.638827302233428e-05, | |
| "loss": 1.2581, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 3.2526315789473683, | |
| "grad_norm": 0.37709353602618134, | |
| "learning_rate": 2.625016168790664e-05, | |
| "loss": 1.2533, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 3.2578947368421054, | |
| "grad_norm": 0.3270640411740736, | |
| "learning_rate": 2.611223599051198e-05, | |
| "loss": 1.2743, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 3.263157894736842, | |
| "grad_norm": 0.32059620385654264, | |
| "learning_rate": 2.597449779228983e-05, | |
| "loss": 1.2568, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.268421052631579, | |
| "grad_norm": 0.39808261047072035, | |
| "learning_rate": 2.5836948952848255e-05, | |
| "loss": 1.2525, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 3.2736842105263158, | |
| "grad_norm": 0.34113291757836145, | |
| "learning_rate": 2.5699591329238812e-05, | |
| "loss": 1.268, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 3.2789473684210524, | |
| "grad_norm": 0.3042065217936969, | |
| "learning_rate": 2.5562426775931418e-05, | |
| "loss": 1.2483, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 3.2842105263157895, | |
| "grad_norm": 0.3974087061640213, | |
| "learning_rate": 2.5425457144789364e-05, | |
| "loss": 1.2609, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 3.2894736842105265, | |
| "grad_norm": 0.321409927169932, | |
| "learning_rate": 2.5288684285044283e-05, | |
| "loss": 1.255, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.294736842105263, | |
| "grad_norm": 0.385869925356024, | |
| "learning_rate": 2.5152110043271166e-05, | |
| "loss": 1.2576, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "grad_norm": 0.3637124957498029, | |
| "learning_rate": 2.501573626336352e-05, | |
| "loss": 1.2411, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 3.305263157894737, | |
| "grad_norm": 0.3710385685313032, | |
| "learning_rate": 2.4879564786508343e-05, | |
| "loss": 1.2592, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 3.3105263157894735, | |
| "grad_norm": 0.4487560727745529, | |
| "learning_rate": 2.474359745116136e-05, | |
| "loss": 1.2404, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 3.3157894736842106, | |
| "grad_norm": 0.3231869771450256, | |
| "learning_rate": 2.460783609302218e-05, | |
| "loss": 1.2547, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.3210526315789473, | |
| "grad_norm": 0.4088431022056057, | |
| "learning_rate": 2.4472282545009493e-05, | |
| "loss": 1.2548, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 3.3263157894736843, | |
| "grad_norm": 0.29515450703495905, | |
| "learning_rate": 2.4336938637236352e-05, | |
| "loss": 1.2525, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 3.331578947368421, | |
| "grad_norm": 0.33297468568328076, | |
| "learning_rate": 2.4201806196985426e-05, | |
| "loss": 1.2737, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 3.336842105263158, | |
| "grad_norm": 0.3335294632136315, | |
| "learning_rate": 2.4066887048684394e-05, | |
| "loss": 1.2447, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 3.3421052631578947, | |
| "grad_norm": 0.2879112803644998, | |
| "learning_rate": 2.393218301388123e-05, | |
| "loss": 1.2715, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 3.3473684210526318, | |
| "grad_norm": 0.3133592323848536, | |
| "learning_rate": 2.3797695911219668e-05, | |
| "loss": 1.2561, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 3.3526315789473684, | |
| "grad_norm": 0.2430811283889928, | |
| "learning_rate": 2.3663427556414664e-05, | |
| "loss": 1.2601, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 3.3578947368421055, | |
| "grad_norm": 0.3579114126056535, | |
| "learning_rate": 2.352937976222781e-05, | |
| "loss": 1.253, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 3.363157894736842, | |
| "grad_norm": 0.26852343656836425, | |
| "learning_rate": 2.3395554338442908e-05, | |
| "loss": 1.245, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 3.3684210526315788, | |
| "grad_norm": 0.3011001164622397, | |
| "learning_rate": 2.3261953091841553e-05, | |
| "loss": 1.2546, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.373684210526316, | |
| "grad_norm": 0.26481840311987703, | |
| "learning_rate": 2.3128577826178723e-05, | |
| "loss": 1.2606, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 3.3789473684210525, | |
| "grad_norm": 0.3257272912007352, | |
| "learning_rate": 2.2995430342158365e-05, | |
| "loss": 1.2353, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 3.3842105263157896, | |
| "grad_norm": 0.38000488426555273, | |
| "learning_rate": 2.2862512437409162e-05, | |
| "loss": 1.2423, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 3.389473684210526, | |
| "grad_norm": 0.29866098174675637, | |
| "learning_rate": 2.272982590646029e-05, | |
| "loss": 1.2653, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 3.3947368421052633, | |
| "grad_norm": 0.7482169914063777, | |
| "learning_rate": 2.2597372540717083e-05, | |
| "loss": 1.2591, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "grad_norm": 0.48442639740243737, | |
| "learning_rate": 2.24651541284369e-05, | |
| "loss": 1.2748, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 3.405263157894737, | |
| "grad_norm": 0.3933443985045218, | |
| "learning_rate": 2.233317245470504e-05, | |
| "loss": 1.2491, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 3.4105263157894736, | |
| "grad_norm": 0.4653265340743596, | |
| "learning_rate": 2.220142930141054e-05, | |
| "loss": 1.2592, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 3.4157894736842107, | |
| "grad_norm": 0.42673076337011967, | |
| "learning_rate": 2.206992644722216e-05, | |
| "loss": 1.2396, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 3.4210526315789473, | |
| "grad_norm": 0.33876188196334395, | |
| "learning_rate": 2.1938665667564435e-05, | |
| "loss": 1.2436, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.4263157894736844, | |
| "grad_norm": 0.3630588058950603, | |
| "learning_rate": 2.1807648734593558e-05, | |
| "loss": 1.2557, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 3.431578947368421, | |
| "grad_norm": 0.3529402619316208, | |
| "learning_rate": 2.167687741717358e-05, | |
| "loss": 1.2536, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 3.4368421052631577, | |
| "grad_norm": 0.3145009910486473, | |
| "learning_rate": 2.1546353480852495e-05, | |
| "loss": 1.2465, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 3.442105263157895, | |
| "grad_norm": 0.2825566028878834, | |
| "learning_rate": 2.1416078687838403e-05, | |
| "loss": 1.2543, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 3.4473684210526314, | |
| "grad_norm": 0.2872680469582709, | |
| "learning_rate": 2.1286054796975696e-05, | |
| "loss": 1.2637, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 3.4526315789473685, | |
| "grad_norm": 0.2802498708050248, | |
| "learning_rate": 2.115628356372131e-05, | |
| "loss": 1.245, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 3.457894736842105, | |
| "grad_norm": 0.2779169417503312, | |
| "learning_rate": 2.1026766740121096e-05, | |
| "loss": 1.2548, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 3.463157894736842, | |
| "grad_norm": 0.27790502165031583, | |
| "learning_rate": 2.089750607478606e-05, | |
| "loss": 1.2482, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 3.468421052631579, | |
| "grad_norm": 0.3106234637273863, | |
| "learning_rate": 2.076850331286881e-05, | |
| "loss": 1.2474, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 3.473684210526316, | |
| "grad_norm": 0.2460612966298966, | |
| "learning_rate": 2.063976019604006e-05, | |
| "loss": 1.2578, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.4789473684210526, | |
| "grad_norm": 0.4002624603687612, | |
| "learning_rate": 2.0511278462464933e-05, | |
| "loss": 1.2323, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 3.4842105263157896, | |
| "grad_norm": 0.3558072656216221, | |
| "learning_rate": 2.038305984677969e-05, | |
| "loss": 1.2513, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 3.4894736842105263, | |
| "grad_norm": 0.32674276214626774, | |
| "learning_rate": 2.025510608006819e-05, | |
| "loss": 1.248, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 3.4947368421052634, | |
| "grad_norm": 0.3685362965399088, | |
| "learning_rate": 2.012741888983861e-05, | |
| "loss": 1.2612, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "grad_norm": 0.3851874388241183, | |
| "learning_rate": 2.0000000000000012e-05, | |
| "loss": 1.26, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 3.5052631578947366, | |
| "grad_norm": 0.2922093360847206, | |
| "learning_rate": 1.9872851130839126e-05, | |
| "loss": 1.2503, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 3.5105263157894737, | |
| "grad_norm": 0.2982128935849179, | |
| "learning_rate": 1.9745973998997177e-05, | |
| "loss": 1.2461, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 3.515789473684211, | |
| "grad_norm": 0.36881831273338744, | |
| "learning_rate": 1.9619370317446612e-05, | |
| "loss": 1.2627, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 3.5210526315789474, | |
| "grad_norm": 0.25559075127742553, | |
| "learning_rate": 1.9493041795468018e-05, | |
| "loss": 1.2474, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 3.526315789473684, | |
| "grad_norm": 0.6103223603779421, | |
| "learning_rate": 1.9366990138627054e-05, | |
| "loss": 1.2553, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.531578947368421, | |
| "grad_norm": 0.32053875904249984, | |
| "learning_rate": 1.9241217048751406e-05, | |
| "loss": 1.2716, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 3.536842105263158, | |
| "grad_norm": 0.32627511828160094, | |
| "learning_rate": 1.911572422390783e-05, | |
| "loss": 1.2509, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 3.542105263157895, | |
| "grad_norm": 0.31231339980121575, | |
| "learning_rate": 1.899051335837919e-05, | |
| "loss": 1.2542, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 3.5473684210526315, | |
| "grad_norm": 0.31642734990082777, | |
| "learning_rate": 1.886558614264165e-05, | |
| "loss": 1.2544, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 3.5526315789473686, | |
| "grad_norm": 0.41419322615420073, | |
| "learning_rate": 1.8740944263341773e-05, | |
| "loss": 1.2722, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 3.557894736842105, | |
| "grad_norm": 0.2575000448429207, | |
| "learning_rate": 1.8616589403273776e-05, | |
| "loss": 1.251, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 3.5631578947368423, | |
| "grad_norm": 0.45829370611833337, | |
| "learning_rate": 1.8492523241356877e-05, | |
| "loss": 1.2552, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 3.568421052631579, | |
| "grad_norm": 0.3876144668681015, | |
| "learning_rate": 1.8368747452612504e-05, | |
| "loss": 1.2756, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 3.5736842105263156, | |
| "grad_norm": 0.3605137220418223, | |
| "learning_rate": 1.8245263708141782e-05, | |
| "loss": 1.242, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 3.5789473684210527, | |
| "grad_norm": 0.3947355937612717, | |
| "learning_rate": 1.8122073675102935e-05, | |
| "loss": 1.2556, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.5842105263157897, | |
| "grad_norm": 0.29347916836402094, | |
| "learning_rate": 1.7999179016688763e-05, | |
| "loss": 1.26, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 3.5894736842105264, | |
| "grad_norm": 0.32495295214844105, | |
| "learning_rate": 1.7876581392104225e-05, | |
| "loss": 1.2496, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 3.594736842105263, | |
| "grad_norm": 0.2493724682619427, | |
| "learning_rate": 1.7754282456543977e-05, | |
| "loss": 1.2514, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "grad_norm": 0.35605401548647925, | |
| "learning_rate": 1.7632283861170135e-05, | |
| "loss": 1.2539, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 3.6052631578947367, | |
| "grad_norm": 0.2630345804072707, | |
| "learning_rate": 1.7510587253089842e-05, | |
| "loss": 1.2579, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 3.610526315789474, | |
| "grad_norm": 0.2772719177300871, | |
| "learning_rate": 1.7389194275333124e-05, | |
| "loss": 1.2471, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 3.6157894736842104, | |
| "grad_norm": 0.3256551364716347, | |
| "learning_rate": 1.7268106566830713e-05, | |
| "loss": 1.2562, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 3.6210526315789475, | |
| "grad_norm": 0.2942105351769792, | |
| "learning_rate": 1.7147325762391848e-05, | |
| "loss": 1.2664, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 3.626315789473684, | |
| "grad_norm": 0.29601761914650015, | |
| "learning_rate": 1.702685349268226e-05, | |
| "loss": 1.2559, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 3.6315789473684212, | |
| "grad_norm": 0.2759560921461832, | |
| "learning_rate": 1.690669138420215e-05, | |
| "loss": 1.2591, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.636842105263158, | |
| "grad_norm": 0.2440653651529168, | |
| "learning_rate": 1.6786841059264217e-05, | |
| "loss": 1.2574, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 3.6421052631578945, | |
| "grad_norm": 0.303898127022955, | |
| "learning_rate": 1.6667304135971756e-05, | |
| "loss": 1.2547, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 3.6473684210526316, | |
| "grad_norm": 0.2481861381786453, | |
| "learning_rate": 1.65480822281968e-05, | |
| "loss": 1.2488, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 3.6526315789473687, | |
| "grad_norm": 0.2565499348104272, | |
| "learning_rate": 1.6429176945558413e-05, | |
| "loss": 1.2561, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 3.6578947368421053, | |
| "grad_norm": 0.3224687182653659, | |
| "learning_rate": 1.6310589893400804e-05, | |
| "loss": 1.247, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 3.663157894736842, | |
| "grad_norm": 0.25279520055905946, | |
| "learning_rate": 1.6192322672771793e-05, | |
| "loss": 1.2636, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 3.668421052631579, | |
| "grad_norm": 0.3239078414093973, | |
| "learning_rate": 1.6074376880401147e-05, | |
| "loss": 1.2431, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 3.6736842105263157, | |
| "grad_norm": 0.25211963429157525, | |
| "learning_rate": 1.5956754108678996e-05, | |
| "loss": 1.2489, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 3.6789473684210527, | |
| "grad_norm": 0.3288796816421695, | |
| "learning_rate": 1.5839455945634372e-05, | |
| "loss": 1.2433, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 3.6842105263157894, | |
| "grad_norm": 0.2570823868070139, | |
| "learning_rate": 1.5722483974913737e-05, | |
| "loss": 1.2437, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.6894736842105265, | |
| "grad_norm": 0.23205884441696503, | |
| "learning_rate": 1.560583977575964e-05, | |
| "loss": 1.2558, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 3.694736842105263, | |
| "grad_norm": 0.254586157360098, | |
| "learning_rate": 1.5489524922989367e-05, | |
| "loss": 1.2677, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "grad_norm": 0.2528078741758432, | |
| "learning_rate": 1.537354098697367e-05, | |
| "loss": 1.2521, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 3.705263157894737, | |
| "grad_norm": 0.30438966904710707, | |
| "learning_rate": 1.525788953361563e-05, | |
| "loss": 1.2569, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 3.7105263157894735, | |
| "grad_norm": 0.24585215855274448, | |
| "learning_rate": 1.5142572124329418e-05, | |
| "loss": 1.2582, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 3.7157894736842105, | |
| "grad_norm": 0.23812179037555448, | |
| "learning_rate": 1.5027590316019276e-05, | |
| "loss": 1.2582, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 3.7210526315789476, | |
| "grad_norm": 0.2258598951803704, | |
| "learning_rate": 1.491294566105852e-05, | |
| "loss": 1.2398, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 3.7263157894736842, | |
| "grad_norm": 0.23820145432975506, | |
| "learning_rate": 1.4798639707268509e-05, | |
| "loss": 1.26, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 3.731578947368421, | |
| "grad_norm": 0.27098791758934043, | |
| "learning_rate": 1.4684673997897795e-05, | |
| "loss": 1.2467, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 3.736842105263158, | |
| "grad_norm": 0.1895081621315529, | |
| "learning_rate": 1.457105007160129e-05, | |
| "loss": 1.2469, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.7421052631578946, | |
| "grad_norm": 0.24431380487075854, | |
| "learning_rate": 1.4457769462419461e-05, | |
| "loss": 1.2505, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 3.7473684210526317, | |
| "grad_norm": 0.2598287894690381, | |
| "learning_rate": 1.4344833699757662e-05, | |
| "loss": 1.2733, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 3.7526315789473683, | |
| "grad_norm": 0.24173801356915325, | |
| "learning_rate": 1.4232244308365437e-05, | |
| "loss": 1.2515, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 3.7578947368421054, | |
| "grad_norm": 0.2744768545995936, | |
| "learning_rate": 1.4120002808315999e-05, | |
| "loss": 1.2446, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 3.763157894736842, | |
| "grad_norm": 0.29075680429359135, | |
| "learning_rate": 1.4008110714985623e-05, | |
| "loss": 1.2576, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 3.768421052631579, | |
| "grad_norm": 0.1679499052346039, | |
| "learning_rate": 1.3896569539033253e-05, | |
| "loss": 1.2434, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 3.7736842105263158, | |
| "grad_norm": 0.21354680460803685, | |
| "learning_rate": 1.3785380786380103e-05, | |
| "loss": 1.2642, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 3.7789473684210524, | |
| "grad_norm": 0.24355235079533985, | |
| "learning_rate": 1.367454595818928e-05, | |
| "loss": 1.2449, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 3.7842105263157895, | |
| "grad_norm": 0.17842505149174132, | |
| "learning_rate": 1.3564066550845558e-05, | |
| "loss": 1.2399, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 3.7894736842105265, | |
| "grad_norm": 0.2363958816949115, | |
| "learning_rate": 1.3453944055935151e-05, | |
| "loss": 1.2471, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.794736842105263, | |
| "grad_norm": 0.20243183669778259, | |
| "learning_rate": 1.3344179960225603e-05, | |
| "loss": 1.2535, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 0.2471952644451058, | |
| "learning_rate": 1.3234775745645684e-05, | |
| "loss": 1.2484, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 3.805263157894737, | |
| "grad_norm": 0.21944384742054443, | |
| "learning_rate": 1.3125732889265393e-05, | |
| "loss": 1.2444, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 3.8105263157894735, | |
| "grad_norm": 0.22384339943654685, | |
| "learning_rate": 1.3017052863276054e-05, | |
| "loss": 1.2544, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 3.8157894736842106, | |
| "grad_norm": 0.20643300200194556, | |
| "learning_rate": 1.2908737134970367e-05, | |
| "loss": 1.2455, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 3.8210526315789473, | |
| "grad_norm": 0.22387663232782792, | |
| "learning_rate": 1.2800787166722634e-05, | |
| "loss": 1.2415, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 3.8263157894736843, | |
| "grad_norm": 0.23601246798864953, | |
| "learning_rate": 1.2693204415969068e-05, | |
| "loss": 1.2488, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 3.831578947368421, | |
| "grad_norm": 0.21781387567237637, | |
| "learning_rate": 1.2585990335188014e-05, | |
| "loss": 1.2346, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 3.836842105263158, | |
| "grad_norm": 0.20954968812529903, | |
| "learning_rate": 1.2479146371880408e-05, | |
| "loss": 1.25, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 3.8421052631578947, | |
| "grad_norm": 0.2935272323831709, | |
| "learning_rate": 1.2372673968550229e-05, | |
| "loss": 1.2575, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.8473684210526313, | |
| "grad_norm": 0.23428135792560334, | |
| "learning_rate": 1.2266574562684994e-05, | |
| "loss": 1.2477, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 3.8526315789473684, | |
| "grad_norm": 0.18658016102303704, | |
| "learning_rate": 1.2160849586736375e-05, | |
| "loss": 1.256, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 3.8578947368421055, | |
| "grad_norm": 0.23105098493810466, | |
| "learning_rate": 1.2055500468100849e-05, | |
| "loss": 1.2399, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 3.863157894736842, | |
| "grad_norm": 0.1929616859489707, | |
| "learning_rate": 1.1950528629100457e-05, | |
| "loss": 1.2515, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 3.8684210526315788, | |
| "grad_norm": 0.218750003790284, | |
| "learning_rate": 1.1845935486963546e-05, | |
| "loss": 1.2489, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 3.873684210526316, | |
| "grad_norm": 0.19977098349774547, | |
| "learning_rate": 1.1741722453805657e-05, | |
| "loss": 1.2449, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 3.8789473684210525, | |
| "grad_norm": 0.23507506446012338, | |
| "learning_rate": 1.163789093661051e-05, | |
| "loss": 1.2562, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 3.8842105263157896, | |
| "grad_norm": 0.19034197687876206, | |
| "learning_rate": 1.1534442337210919e-05, | |
| "loss": 1.2528, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 3.889473684210526, | |
| "grad_norm": 0.25267159420496116, | |
| "learning_rate": 1.1431378052269934e-05, | |
| "loss": 1.2571, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 3.8947368421052633, | |
| "grad_norm": 0.21369948030483346, | |
| "learning_rate": 1.1328699473261957e-05, | |
| "loss": 1.241, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "grad_norm": 0.23307740618119258, | |
| "learning_rate": 1.1226407986453963e-05, | |
| "loss": 1.2557, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 3.905263157894737, | |
| "grad_norm": 0.19115969783367653, | |
| "learning_rate": 1.1124504972886782e-05, | |
| "loss": 1.2525, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 3.9105263157894736, | |
| "grad_norm": 0.2681117091243346, | |
| "learning_rate": 1.1022991808356442e-05, | |
| "loss": 1.248, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 3.9157894736842103, | |
| "grad_norm": 0.1651284103554666, | |
| "learning_rate": 1.0921869863395642e-05, | |
| "loss": 1.242, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 3.9210526315789473, | |
| "grad_norm": 0.24161510420189317, | |
| "learning_rate": 1.0821140503255174e-05, | |
| "loss": 1.2555, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 3.9263157894736844, | |
| "grad_norm": 0.20280135248319278, | |
| "learning_rate": 1.0720805087885533e-05, | |
| "loss": 1.2578, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 3.931578947368421, | |
| "grad_norm": 0.3284807144434915, | |
| "learning_rate": 1.0620864971918579e-05, | |
| "loss": 1.259, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 3.9368421052631577, | |
| "grad_norm": 0.22538990852777954, | |
| "learning_rate": 1.05213215046492e-05, | |
| "loss": 1.2597, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 3.942105263157895, | |
| "grad_norm": 0.19055951323654136, | |
| "learning_rate": 1.0422176030017117e-05, | |
| "loss": 1.2443, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 3.9473684210526314, | |
| "grad_norm": 0.18646041833135787, | |
| "learning_rate": 1.0323429886588743e-05, | |
| "loss": 1.2388, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.9526315789473685, | |
| "grad_norm": 0.19285379546461523, | |
| "learning_rate": 1.0225084407539109e-05, | |
| "loss": 1.2335, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 3.957894736842105, | |
| "grad_norm": 0.1997818414436052, | |
| "learning_rate": 1.0127140920633857e-05, | |
| "loss": 1.2439, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 3.963157894736842, | |
| "grad_norm": 0.20149581036707856, | |
| "learning_rate": 1.0029600748211314e-05, | |
| "loss": 1.2415, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 3.968421052631579, | |
| "grad_norm": 0.19260248911961064, | |
| "learning_rate": 9.932465207164675e-06, | |
| "loss": 1.2633, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 3.973684210526316, | |
| "grad_norm": 0.21099578591151794, | |
| "learning_rate": 9.835735608924155e-06, | |
| "loss": 1.231, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 3.9789473684210526, | |
| "grad_norm": 0.17132739675169845, | |
| "learning_rate": 9.739413259439337e-06, | |
| "loss": 1.2451, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 3.984210526315789, | |
| "grad_norm": 0.21904215059223633, | |
| "learning_rate": 9.643499459161538e-06, | |
| "loss": 1.2523, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 3.9894736842105263, | |
| "grad_norm": 0.224551193557602, | |
| "learning_rate": 9.547995503026217e-06, | |
| "loss": 1.2478, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 3.9947368421052634, | |
| "grad_norm": 0.19238609932248696, | |
| "learning_rate": 9.452902680435527e-06, | |
| "loss": 1.249, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.22055223309269914, | |
| "learning_rate": 9.358222275240884e-06, | |
| "loss": 1.2167, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.005263157894737, | |
| "grad_norm": 0.20140128214778716, | |
| "learning_rate": 9.263955565725648e-06, | |
| "loss": 1.2391, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 4.010526315789473, | |
| "grad_norm": 0.2068373210972995, | |
| "learning_rate": 9.170103824587855e-06, | |
| "loss": 1.2331, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 4.015789473684211, | |
| "grad_norm": 0.18232115316386402, | |
| "learning_rate": 9.07666831892304e-06, | |
| "loss": 1.2121, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 4.021052631578947, | |
| "grad_norm": 0.2188152260857773, | |
| "learning_rate": 8.983650310207142e-06, | |
| "loss": 1.2232, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 4.026315789473684, | |
| "grad_norm": 0.1880274936269495, | |
| "learning_rate": 8.89105105427945e-06, | |
| "loss": 1.2272, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.031578947368421, | |
| "grad_norm": 0.17030491611623289, | |
| "learning_rate": 8.798871801325632e-06, | |
| "loss": 1.2284, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 4.036842105263158, | |
| "grad_norm": 0.1887119280020856, | |
| "learning_rate": 8.707113795860938e-06, | |
| "loss": 1.2364, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 4.042105263157895, | |
| "grad_norm": 0.18907111180220373, | |
| "learning_rate": 8.615778276713293e-06, | |
| "loss": 1.2277, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 4.0473684210526315, | |
| "grad_norm": 0.17028701794910334, | |
| "learning_rate": 8.524866477006637e-06, | |
| "loss": 1.2268, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 4.052631578947368, | |
| "grad_norm": 0.1927239082270522, | |
| "learning_rate": 8.434379624144261e-06, | |
| "loss": 1.2202, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.057894736842106, | |
| "grad_norm": 0.18231681740661396, | |
| "learning_rate": 8.344318939792232e-06, | |
| "loss": 1.2103, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 4.063157894736842, | |
| "grad_norm": 0.2108141165888399, | |
| "learning_rate": 8.254685639862896e-06, | |
| "loss": 1.2289, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 4.068421052631579, | |
| "grad_norm": 0.21501105777435195, | |
| "learning_rate": 8.165480934498462e-06, | |
| "loss": 1.2304, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 4.073684210526316, | |
| "grad_norm": 0.22014095135466175, | |
| "learning_rate": 8.076706028054709e-06, | |
| "loss": 1.2395, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 4.078947368421052, | |
| "grad_norm": 0.18281510166398557, | |
| "learning_rate": 7.988362119084642e-06, | |
| "loss": 1.232, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.08421052631579, | |
| "grad_norm": 0.21712131045816194, | |
| "learning_rate": 7.90045040032236e-06, | |
| "loss": 1.2423, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 4.089473684210526, | |
| "grad_norm": 0.19226805462323326, | |
| "learning_rate": 7.812972058666974e-06, | |
| "loss": 1.2295, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 4.094736842105263, | |
| "grad_norm": 0.175015352113717, | |
| "learning_rate": 7.725928275166534e-06, | |
| "loss": 1.2282, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "grad_norm": 0.2095750364202842, | |
| "learning_rate": 7.639320225002106e-06, | |
| "loss": 1.2244, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 4.105263157894737, | |
| "grad_norm": 0.19644672306841843, | |
| "learning_rate": 7.553149077471915e-06, | |
| "loss": 1.2217, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.110526315789474, | |
| "grad_norm": 0.2000635414888708, | |
| "learning_rate": 7.46741599597554e-06, | |
| "loss": 1.2319, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 4.11578947368421, | |
| "grad_norm": 0.1746543551783459, | |
| "learning_rate": 7.382122137998209e-06, | |
| "loss": 1.2282, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 4.121052631578947, | |
| "grad_norm": 0.17481980918717463, | |
| "learning_rate": 7.297268655095213e-06, | |
| "loss": 1.2395, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 4.126315789473685, | |
| "grad_norm": 0.17610089627569894, | |
| "learning_rate": 7.212856692876289e-06, | |
| "loss": 1.2319, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 4.131578947368421, | |
| "grad_norm": 0.17566117386443802, | |
| "learning_rate": 7.128887390990198e-06, | |
| "loss": 1.2245, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 4.136842105263158, | |
| "grad_norm": 0.18888285977402394, | |
| "learning_rate": 7.045361883109318e-06, | |
| "loss": 1.2363, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 4.1421052631578945, | |
| "grad_norm": 0.1679963465599155, | |
| "learning_rate": 6.962281296914386e-06, | |
| "loss": 1.2319, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 4.147368421052631, | |
| "grad_norm": 0.17232128719198106, | |
| "learning_rate": 6.8796467540791986e-06, | |
| "loss": 1.2312, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 4.152631578947369, | |
| "grad_norm": 0.19685528274227304, | |
| "learning_rate": 6.797459370255519e-06, | |
| "loss": 1.2324, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 4.157894736842105, | |
| "grad_norm": 0.1583456150516079, | |
| "learning_rate": 6.715720255058e-06, | |
| "loss": 1.24, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.163157894736842, | |
| "grad_norm": 0.172328795648275, | |
| "learning_rate": 6.634430512049213e-06, | |
| "loss": 1.2513, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 4.168421052631579, | |
| "grad_norm": 0.16257107292586506, | |
| "learning_rate": 6.553591238724712e-06, | |
| "loss": 1.2275, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 4.173684210526316, | |
| "grad_norm": 0.14389724088218966, | |
| "learning_rate": 6.4732035264982904e-06, | |
| "loss": 1.2348, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 4.178947368421053, | |
| "grad_norm": 0.15689066100797078, | |
| "learning_rate": 6.39326846068717e-06, | |
| "loss": 1.2179, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 4.184210526315789, | |
| "grad_norm": 0.18533318047509703, | |
| "learning_rate": 6.313787120497376e-06, | |
| "loss": 1.236, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 4.189473684210526, | |
| "grad_norm": 0.1459277700590749, | |
| "learning_rate": 6.234760579009167e-06, | |
| "loss": 1.2435, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 4.1947368421052635, | |
| "grad_norm": 0.155103015306397, | |
| "learning_rate": 6.1561899031625794e-06, | |
| "loss": 1.2282, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 0.1477347804716696, | |
| "learning_rate": 6.078076153742962e-06, | |
| "loss": 1.2249, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 4.205263157894737, | |
| "grad_norm": 0.15276423763995617, | |
| "learning_rate": 6.000420385366687e-06, | |
| "loss": 1.2297, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 4.2105263157894735, | |
| "grad_norm": 0.15126290143221918, | |
| "learning_rate": 5.923223646466923e-06, | |
| "loss": 1.2387, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.215789473684211, | |
| "grad_norm": 0.15492928616201465, | |
| "learning_rate": 5.846486979279449e-06, | |
| "loss": 1.2367, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 4.221052631578948, | |
| "grad_norm": 0.17188412280549703, | |
| "learning_rate": 5.770211419828604e-06, | |
| "loss": 1.2322, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 4.226315789473684, | |
| "grad_norm": 0.15097184444197026, | |
| "learning_rate": 5.694397997913319e-06, | |
| "loss": 1.2321, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 4.231578947368421, | |
| "grad_norm": 0.1453328152503722, | |
| "learning_rate": 5.619047737093164e-06, | |
| "loss": 1.2384, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 4.2368421052631575, | |
| "grad_norm": 0.18220366542871314, | |
| "learning_rate": 5.5441616546745646e-06, | |
| "loss": 1.2383, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 4.242105263157895, | |
| "grad_norm": 0.167450785630923, | |
| "learning_rate": 5.469740761697044e-06, | |
| "loss": 1.2426, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 4.247368421052632, | |
| "grad_norm": 0.14931148609570408, | |
| "learning_rate": 5.395786062919622e-06, | |
| "loss": 1.2333, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 4.252631578947368, | |
| "grad_norm": 0.16803901696022852, | |
| "learning_rate": 5.322298556807179e-06, | |
| "loss": 1.2417, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 4.257894736842105, | |
| "grad_norm": 0.16226281008273294, | |
| "learning_rate": 5.249279235517031e-06, | |
| "loss": 1.2329, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 4.2631578947368425, | |
| "grad_norm": 0.13999210516589425, | |
| "learning_rate": 5.176729084885508e-06, | |
| "loss": 1.2412, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.268421052631579, | |
| "grad_norm": 0.18759320208384703, | |
| "learning_rate": 5.10464908441465e-06, | |
| "loss": 1.2357, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 4.273684210526316, | |
| "grad_norm": 0.17340200354001228, | |
| "learning_rate": 5.033040207258979e-06, | |
| "loss": 1.2271, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 4.278947368421052, | |
| "grad_norm": 0.15950936103038027, | |
| "learning_rate": 4.9619034202123884e-06, | |
| "loss": 1.2151, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 4.284210526315789, | |
| "grad_norm": 0.15140529416594803, | |
| "learning_rate": 4.891239683695044e-06, | |
| "loss": 1.232, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 4.2894736842105265, | |
| "grad_norm": 0.13998854140642578, | |
| "learning_rate": 4.821049951740442e-06, | |
| "loss": 1.2255, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 4.294736842105263, | |
| "grad_norm": 0.1499375273785916, | |
| "learning_rate": 4.751335171982527e-06, | |
| "loss": 1.2314, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "grad_norm": 0.158191412279702, | |
| "learning_rate": 4.6820962856429205e-06, | |
| "loss": 1.234, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 4.3052631578947365, | |
| "grad_norm": 0.14057999268017907, | |
| "learning_rate": 4.613334227518165e-06, | |
| "loss": 1.2427, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 4.310526315789474, | |
| "grad_norm": 0.1433236145981049, | |
| "learning_rate": 4.545049925967137e-06, | |
| "loss": 1.2313, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 4.315789473684211, | |
| "grad_norm": 0.1303171009707853, | |
| "learning_rate": 4.4772443028985004e-06, | |
| "loss": 1.2297, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.321052631578947, | |
| "grad_norm": 0.15617718436585057, | |
| "learning_rate": 4.409918273758278e-06, | |
| "loss": 1.2412, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 4.326315789473684, | |
| "grad_norm": 0.1476554630303936, | |
| "learning_rate": 4.343072747517459e-06, | |
| "loss": 1.2387, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 4.331578947368421, | |
| "grad_norm": 0.1362192280798835, | |
| "learning_rate": 4.276708626659778e-06, | |
| "loss": 1.2349, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 4.336842105263158, | |
| "grad_norm": 0.15051183831923126, | |
| "learning_rate": 4.2108268071694616e-06, | |
| "loss": 1.2122, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 4.342105263157895, | |
| "grad_norm": 0.1445207500529269, | |
| "learning_rate": 4.1454281785191995e-06, | |
| "loss": 1.2224, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 4.347368421052631, | |
| "grad_norm": 0.14362316701732558, | |
| "learning_rate": 4.080513623658075e-06, | |
| "loss": 1.2186, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 4.352631578947369, | |
| "grad_norm": 0.1479016471804495, | |
| "learning_rate": 4.0160840189997155e-06, | |
| "loss": 1.2324, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 4.3578947368421055, | |
| "grad_norm": 0.1397296336400901, | |
| "learning_rate": 3.952140234410396e-06, | |
| "loss": 1.2309, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 4.363157894736842, | |
| "grad_norm": 0.12213103880797943, | |
| "learning_rate": 3.888683133197293e-06, | |
| "loss": 1.2231, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 4.368421052631579, | |
| "grad_norm": 0.13169031113809618, | |
| "learning_rate": 3.825713572096903e-06, | |
| "loss": 1.2264, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.373684210526315, | |
| "grad_norm": 0.1415624842501799, | |
| "learning_rate": 3.7632324012633992e-06, | |
| "loss": 1.2444, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 4.378947368421053, | |
| "grad_norm": 0.14671744800493622, | |
| "learning_rate": 3.701240464257181e-06, | |
| "loss": 1.2183, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 4.38421052631579, | |
| "grad_norm": 0.13323253635868224, | |
| "learning_rate": 3.6397385980335e-06, | |
| "loss": 1.2156, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 4.389473684210526, | |
| "grad_norm": 0.13127420581089705, | |
| "learning_rate": 3.5787276329311315e-06, | |
| "loss": 1.2231, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 4.394736842105263, | |
| "grad_norm": 0.133896287855281, | |
| "learning_rate": 3.518208392661184e-06, | |
| "loss": 1.2293, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "grad_norm": 0.13026051571456285, | |
| "learning_rate": 3.458181694295961e-06, | |
| "loss": 1.2395, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 4.405263157894737, | |
| "grad_norm": 0.15010438499441964, | |
| "learning_rate": 3.398648348257969e-06, | |
| "loss": 1.2323, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 4.410526315789474, | |
| "grad_norm": 0.1501949658490909, | |
| "learning_rate": 3.3396091583089275e-06, | |
| "loss": 1.2186, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 4.41578947368421, | |
| "grad_norm": 0.1433763217867749, | |
| "learning_rate": 3.281064921538919e-06, | |
| "loss": 1.2379, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 4.421052631578947, | |
| "grad_norm": 0.1323412042853926, | |
| "learning_rate": 3.2230164283556918e-06, | |
| "loss": 1.2231, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.426315789473684, | |
| "grad_norm": 0.13635885589343097, | |
| "learning_rate": 3.1654644624739082e-06, | |
| "loss": 1.2297, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 4.431578947368421, | |
| "grad_norm": 0.1323736124785357, | |
| "learning_rate": 3.1084098009046106e-06, | |
| "loss": 1.235, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 4.436842105263158, | |
| "grad_norm": 0.13102236402292022, | |
| "learning_rate": 3.0518532139447267e-06, | |
| "loss": 1.2307, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 4.442105263157894, | |
| "grad_norm": 0.15428668766385725, | |
| "learning_rate": 2.995795465166644e-06, | |
| "loss": 1.226, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 4.447368421052632, | |
| "grad_norm": 0.13778512976226498, | |
| "learning_rate": 2.9402373114079295e-06, | |
| "loss": 1.2276, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 4.4526315789473685, | |
| "grad_norm": 0.13474950636883365, | |
| "learning_rate": 2.8851795027610997e-06, | |
| "loss": 1.2228, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 4.457894736842105, | |
| "grad_norm": 0.1353883744809194, | |
| "learning_rate": 2.83062278256351e-06, | |
| "loss": 1.2339, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 4.463157894736842, | |
| "grad_norm": 0.13137189130014673, | |
| "learning_rate": 2.776567887387267e-06, | |
| "loss": 1.2301, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 4.468421052631579, | |
| "grad_norm": 0.13126591401950521, | |
| "learning_rate": 2.723015547029344e-06, | |
| "loss": 1.2468, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 4.473684210526316, | |
| "grad_norm": 0.1415673262181535, | |
| "learning_rate": 2.669966484501716e-06, | |
| "loss": 1.2245, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.478947368421053, | |
| "grad_norm": 0.1320404723499411, | |
| "learning_rate": 2.6174214160215704e-06, | |
| "loss": 1.2352, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 4.484210526315789, | |
| "grad_norm": 0.12633771710003897, | |
| "learning_rate": 2.5653810510016454e-06, | |
| "loss": 1.2339, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 4.489473684210527, | |
| "grad_norm": 0.12316620532344269, | |
| "learning_rate": 2.5138460920406884e-06, | |
| "loss": 1.2317, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 4.494736842105263, | |
| "grad_norm": 0.13602160694846396, | |
| "learning_rate": 2.462817234913919e-06, | |
| "loss": 1.2273, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "grad_norm": 0.1415982613618782, | |
| "learning_rate": 2.4122951685636674e-06, | |
| "loss": 1.2243, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 4.505263157894737, | |
| "grad_norm": 0.3127501030193754, | |
| "learning_rate": 2.3622805750900567e-06, | |
| "loss": 1.2222, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 4.510526315789473, | |
| "grad_norm": 0.13107864455151064, | |
| "learning_rate": 2.3127741297418283e-06, | |
| "loss": 1.2366, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 4.515789473684211, | |
| "grad_norm": 0.13657403397982118, | |
| "learning_rate": 2.2637765009071576e-06, | |
| "loss": 1.2337, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 4.521052631578947, | |
| "grad_norm": 0.1302788270408855, | |
| "learning_rate": 2.215288350104694e-06, | |
| "loss": 1.2253, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 4.526315789473684, | |
| "grad_norm": 0.12664051031696197, | |
| "learning_rate": 2.1673103319746146e-06, | |
| "loss": 1.225, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.531578947368421, | |
| "grad_norm": 0.14352455601262662, | |
| "learning_rate": 2.1198430942697625e-06, | |
| "loss": 1.2251, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 4.536842105263158, | |
| "grad_norm": 0.13649018750914618, | |
| "learning_rate": 2.0728872778469224e-06, | |
| "loss": 1.2407, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 4.542105263157895, | |
| "grad_norm": 0.20719895993192947, | |
| "learning_rate": 2.026443516658163e-06, | |
| "loss": 1.2272, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 4.5473684210526315, | |
| "grad_norm": 0.13901759037964037, | |
| "learning_rate": 1.9805124377422834e-06, | |
| "loss": 1.2368, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 4.552631578947368, | |
| "grad_norm": 0.1307517829866866, | |
| "learning_rate": 1.93509466121633e-06, | |
| "loss": 1.2318, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 4.557894736842105, | |
| "grad_norm": 0.12095060465165465, | |
| "learning_rate": 1.8901908002672442e-06, | |
| "loss": 1.2359, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 4.563157894736842, | |
| "grad_norm": 0.11915812322895941, | |
| "learning_rate": 1.8458014611435705e-06, | |
| "loss": 1.2426, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 4.568421052631579, | |
| "grad_norm": 0.1240067541225263, | |
| "learning_rate": 1.80192724314729e-06, | |
| "loss": 1.2163, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 4.573684210526316, | |
| "grad_norm": 0.12397138277266245, | |
| "learning_rate": 1.7585687386256944e-06, | |
| "loss": 1.2428, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 4.578947368421053, | |
| "grad_norm": 0.13123863782173215, | |
| "learning_rate": 1.7157265329634354e-06, | |
| "loss": 1.2413, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 4.58421052631579, | |
| "grad_norm": 0.13114076151140514, | |
| "learning_rate": 1.6734012045745762e-06, | |
| "loss": 1.2255, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 4.589473684210526, | |
| "grad_norm": 0.11880095160971275, | |
| "learning_rate": 1.6315933248948068e-06, | |
| "loss": 1.2325, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 4.594736842105263, | |
| "grad_norm": 0.133304713863376, | |
| "learning_rate": 1.5903034583737343e-06, | |
| "loss": 1.2406, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "grad_norm": 0.12445121300617833, | |
| "learning_rate": 1.5495321624672443e-06, | |
| "loss": 1.2323, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 4.605263157894737, | |
| "grad_norm": 0.11989093911414492, | |
| "learning_rate": 1.5092799876299835e-06, | |
| "loss": 1.2152, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 4.610526315789474, | |
| "grad_norm": 0.12318779958969978, | |
| "learning_rate": 1.4695474773079287e-06, | |
| "loss": 1.2274, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 4.61578947368421, | |
| "grad_norm": 0.11755163812164948, | |
| "learning_rate": 1.4303351679310473e-06, | |
| "loss": 1.2323, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 4.621052631578947, | |
| "grad_norm": 0.1271239652597123, | |
| "learning_rate": 1.3916435889060575e-06, | |
| "loss": 1.2281, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 4.626315789473685, | |
| "grad_norm": 0.12826240424195157, | |
| "learning_rate": 1.353473262609275e-06, | |
| "loss": 1.2273, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 4.631578947368421, | |
| "grad_norm": 0.12460164768857226, | |
| "learning_rate": 1.3158247043795735e-06, | |
| "loss": 1.2264, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.636842105263158, | |
| "grad_norm": 0.11779769435490604, | |
| "learning_rate": 1.278698422511413e-06, | |
| "loss": 1.2243, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 4.6421052631578945, | |
| "grad_norm": 0.11403097697307746, | |
| "learning_rate": 1.242094918247978e-06, | |
| "loss": 1.2283, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 4.647368421052631, | |
| "grad_norm": 0.12118016084867007, | |
| "learning_rate": 1.2060146857744282e-06, | |
| "loss": 1.2392, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 4.652631578947369, | |
| "grad_norm": 0.12319740930061163, | |
| "learning_rate": 1.1704582122112008e-06, | |
| "loss": 1.2088, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 4.657894736842105, | |
| "grad_norm": 0.11386564708274247, | |
| "learning_rate": 1.1354259776074472e-06, | |
| "loss": 1.233, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 4.663157894736842, | |
| "grad_norm": 0.11374999316034942, | |
| "learning_rate": 1.1009184549345632e-06, | |
| "loss": 1.2386, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 4.668421052631579, | |
| "grad_norm": 0.12522042587937965, | |
| "learning_rate": 1.0669361100797704e-06, | |
| "loss": 1.2418, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 4.673684210526316, | |
| "grad_norm": 0.11429258921626788, | |
| "learning_rate": 1.0334794018398652e-06, | |
| "loss": 1.2178, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 4.678947368421053, | |
| "grad_norm": 0.34812757148076545, | |
| "learning_rate": 1.0005487819149917e-06, | |
| "loss": 1.2272, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 4.684210526315789, | |
| "grad_norm": 0.1182967297844485, | |
| "learning_rate": 9.681446949025752e-07, | |
| "loss": 1.2191, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 4.689473684210526, | |
| "grad_norm": 0.1272033760667648, | |
| "learning_rate": 9.362675782912923e-07, | |
| "loss": 1.2356, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 4.6947368421052635, | |
| "grad_norm": 0.12672455306432165, | |
| "learning_rate": 9.049178624551635e-07, | |
| "loss": 1.2285, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 0.3617879606840202, | |
| "learning_rate": 8.740959706477725e-07, | |
| "loss": 1.2656, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 4.705263157894737, | |
| "grad_norm": 0.10997692184574041, | |
| "learning_rate": 8.438023189965272e-07, | |
| "loss": 1.2358, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 4.7105263157894735, | |
| "grad_norm": 0.12136967224479166, | |
| "learning_rate": 8.140373164970428e-07, | |
| "loss": 1.2146, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 4.715789473684211, | |
| "grad_norm": 0.2009841140710602, | |
| "learning_rate": 7.848013650076258e-07, | |
| "loss": 1.2284, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 4.721052631578948, | |
| "grad_norm": 0.11466884057407387, | |
| "learning_rate": 7.560948592438521e-07, | |
| "loss": 1.241, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 4.726315789473684, | |
| "grad_norm": 0.11496880440793267, | |
| "learning_rate": 7.279181867732199e-07, | |
| "loss": 1.2151, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 4.731578947368421, | |
| "grad_norm": 0.12172797181082162, | |
| "learning_rate": 7.002717280099403e-07, | |
| "loss": 1.2227, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 4.7368421052631575, | |
| "grad_norm": 0.12443319588453902, | |
| "learning_rate": 6.731558562097995e-07, | |
| "loss": 1.2329, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.742105263157895, | |
| "grad_norm": 0.12280105376114048, | |
| "learning_rate": 6.465709374650964e-07, | |
| "loss": 1.2343, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 4.747368421052632, | |
| "grad_norm": 0.11762303018802715, | |
| "learning_rate": 6.205173306997125e-07, | |
| "loss": 1.2267, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 4.752631578947368, | |
| "grad_norm": 0.11816128091190285, | |
| "learning_rate": 5.949953876642855e-07, | |
| "loss": 1.2293, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 4.757894736842105, | |
| "grad_norm": 0.1156560558591028, | |
| "learning_rate": 5.700054529314347e-07, | |
| "loss": 1.2315, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 4.7631578947368425, | |
| "grad_norm": 0.11137701754866611, | |
| "learning_rate": 5.455478638911071e-07, | |
| "loss": 1.2394, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 4.768421052631579, | |
| "grad_norm": 0.11181750038905715, | |
| "learning_rate": 5.216229507460435e-07, | |
| "loss": 1.2208, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 4.773684210526316, | |
| "grad_norm": 0.12036980268449626, | |
| "learning_rate": 4.982310365073107e-07, | |
| "loss": 1.2235, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 4.778947368421052, | |
| "grad_norm": 0.12359942605818125, | |
| "learning_rate": 4.75372436989936e-07, | |
| "loss": 1.2308, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 4.784210526315789, | |
| "grad_norm": 0.13220645490519645, | |
| "learning_rate": 4.530474608086355e-07, | |
| "loss": 1.214, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 4.7894736842105265, | |
| "grad_norm": 0.12206816510347139, | |
| "learning_rate": 4.3125640937368373e-07, | |
| "loss": 1.2194, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 4.794736842105263, | |
| "grad_norm": 0.11617994515280962, | |
| "learning_rate": 4.0999957688679706e-07, | |
| "loss": 1.2241, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "grad_norm": 0.1148058679734953, | |
| "learning_rate": 3.8927725033718553e-07, | |
| "loss": 1.2223, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 4.8052631578947365, | |
| "grad_norm": 0.11823614340464102, | |
| "learning_rate": 3.690897094976942e-07, | |
| "loss": 1.2238, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 4.810526315789474, | |
| "grad_norm": 0.11790591732140702, | |
| "learning_rate": 3.4943722692099224e-07, | |
| "loss": 1.2153, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 4.815789473684211, | |
| "grad_norm": 0.11877977952867706, | |
| "learning_rate": 3.3032006793590977e-07, | |
| "loss": 1.2334, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 4.821052631578947, | |
| "grad_norm": 0.12246828468344964, | |
| "learning_rate": 3.117384906438581e-07, | |
| "loss": 1.2386, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 4.826315789473684, | |
| "grad_norm": 0.10958575864964563, | |
| "learning_rate": 2.936927459153438e-07, | |
| "loss": 1.2392, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 4.831578947368421, | |
| "grad_norm": 0.11159223936915229, | |
| "learning_rate": 2.761830773865759e-07, | |
| "loss": 1.225, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 4.836842105263158, | |
| "grad_norm": 0.11067027350647266, | |
| "learning_rate": 2.5920972145618394e-07, | |
| "loss": 1.2182, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 4.842105263157895, | |
| "grad_norm": 0.11845597460367807, | |
| "learning_rate": 2.4277290728202063e-07, | |
| "loss": 1.2303, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.847368421052631, | |
| "grad_norm": 0.11321338292881286, | |
| "learning_rate": 2.2687285677807536e-07, | |
| "loss": 1.2286, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 4.852631578947369, | |
| "grad_norm": 0.10918511827532087, | |
| "learning_rate": 2.1150978461146332e-07, | |
| "loss": 1.2303, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 4.8578947368421055, | |
| "grad_norm": 0.11331613848290951, | |
| "learning_rate": 1.9668389819954338e-07, | |
| "loss": 1.2238, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 4.863157894736842, | |
| "grad_norm": 0.11227809077874316, | |
| "learning_rate": 1.8239539770711133e-07, | |
| "loss": 1.229, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 4.868421052631579, | |
| "grad_norm": 0.11188390803054302, | |
| "learning_rate": 1.6864447604370004e-07, | |
| "loss": 1.2315, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 4.873684210526315, | |
| "grad_norm": 0.1126954574845899, | |
| "learning_rate": 1.5543131886096352e-07, | |
| "loss": 1.2281, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 4.878947368421053, | |
| "grad_norm": 0.1154664007961282, | |
| "learning_rate": 1.427561045501902e-07, | |
| "loss": 1.2372, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 4.88421052631579, | |
| "grad_norm": 0.11176844779105831, | |
| "learning_rate": 1.3061900423986917e-07, | |
| "loss": 1.2268, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 4.889473684210526, | |
| "grad_norm": 0.11214271981901136, | |
| "learning_rate": 1.1902018179340779e-07, | |
| "loss": 1.2211, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 4.894736842105263, | |
| "grad_norm": 0.11806437367689042, | |
| "learning_rate": 1.0795979380690657e-07, | |
| "loss": 1.2232, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "grad_norm": 0.12131946872074126, | |
| "learning_rate": 9.74379896070321e-08, | |
| "loss": 1.2392, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 4.905263157894737, | |
| "grad_norm": 0.11758661501722971, | |
| "learning_rate": 8.745491124901861e-08, | |
| "loss": 1.2215, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 4.910526315789474, | |
| "grad_norm": 0.10980377112088192, | |
| "learning_rate": 7.80106935147451e-08, | |
| "loss": 1.2412, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 4.91578947368421, | |
| "grad_norm": 0.11037951117364361, | |
| "learning_rate": 6.910546391092343e-08, | |
| "loss": 1.2198, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 4.921052631578947, | |
| "grad_norm": 0.11711041285423687, | |
| "learning_rate": 6.073934266735303e-08, | |
| "loss": 1.2256, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 4.926315789473684, | |
| "grad_norm": 0.11213122469542446, | |
| "learning_rate": 5.291244273531782e-08, | |
| "loss": 1.2389, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 4.931578947368421, | |
| "grad_norm": 0.11158223482551854, | |
| "learning_rate": 4.562486978606728e-08, | |
| "loss": 1.2358, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 4.936842105263158, | |
| "grad_norm": 0.10743288484662021, | |
| "learning_rate": 3.887672220936445e-08, | |
| "loss": 1.2142, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 4.942105263157895, | |
| "grad_norm": 0.11480044753648233, | |
| "learning_rate": 3.266809111218017e-08, | |
| "loss": 1.2304, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 4.947368421052632, | |
| "grad_norm": 0.12600905075056, | |
| "learning_rate": 2.699906031745414e-08, | |
| "loss": 1.2348, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 4.9526315789473685, | |
| "grad_norm": 0.10693815172707843, | |
| "learning_rate": 2.1869706362958044e-08, | |
| "loss": 1.2329, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 4.957894736842105, | |
| "grad_norm": 0.11368842959943799, | |
| "learning_rate": 1.7280098500283005e-08, | |
| "loss": 1.2461, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 4.963157894736842, | |
| "grad_norm": 0.11074973929231093, | |
| "learning_rate": 1.3230298693871491e-08, | |
| "loss": 1.2364, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 4.968421052631579, | |
| "grad_norm": 0.11094251507004392, | |
| "learning_rate": 9.720361620217943e-09, | |
| "loss": 1.2314, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 4.973684210526316, | |
| "grad_norm": 0.11419040432886776, | |
| "learning_rate": 6.750334667091629e-09, | |
| "loss": 1.23, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 4.978947368421053, | |
| "grad_norm": 0.2986955255592173, | |
| "learning_rate": 4.320257932928229e-09, | |
| "loss": 1.2347, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 4.984210526315789, | |
| "grad_norm": 0.10907490264263059, | |
| "learning_rate": 2.4301642262791748e-09, | |
| "loss": 1.2327, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 4.989473684210527, | |
| "grad_norm": 0.11799915395997997, | |
| "learning_rate": 1.0800790653675564e-09, | |
| "loss": 1.2269, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 4.994736842105263, | |
| "grad_norm": 0.11484395305342408, | |
| "learning_rate": 2.700206777328518e-10, | |
| "loss": 1.2454, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 0.11735650459931829, | |
| "learning_rate": 0.0, | |
| "loss": 1.2331, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 950, | |
| "total_flos": 1.59373351452672e+16, | |
| "train_loss": 1.3301890049482648, | |
| "train_runtime": 16504.5508, | |
| "train_samples_per_second": 29.374, | |
| "train_steps_per_second": 0.058 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 950, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.59373351452672e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |