hongyun1021 commited on
Commit
7626446
·
verified ·
1 Parent(s): 3b664a4

Delete trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -406
trainer_state.json DELETED
@@ -1,406 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
- "eval_steps": 500,
6
- "global_step": 26178,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.019100007640003056,
13
- "grad_norm": 0.25021880865097046,
14
- "learning_rate": 1.2722646310432571e-05,
15
- "loss": 1.8349,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 0.03820001528000611,
20
- "grad_norm": 0.23228777945041656,
21
- "learning_rate": 1.999649508002506e-05,
22
- "loss": 1.0711,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 0.057300022920009165,
27
- "grad_norm": 0.21457377076148987,
28
- "learning_rate": 1.9961006707312773e-05,
29
- "loss": 1.0713,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 0.07640003056001222,
34
- "grad_norm": 0.21082378923892975,
35
- "learning_rate": 1.9887410816586273e-05,
36
- "loss": 1.0714,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 0.09550003820001528,
41
- "grad_norm": 0.22083863615989685,
42
- "learning_rate": 1.977598896138871e-05,
43
- "loss": 1.0713,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 0.11460004584001833,
48
- "grad_norm": 0.2152423858642578,
49
- "learning_rate": 1.962716740489596e-05,
50
- "loss": 1.0711,
51
- "step": 3000
52
- },
53
- {
54
- "epoch": 0.1337000534800214,
55
- "grad_norm": 0.22183740139007568,
56
- "learning_rate": 1.9441515489174708e-05,
57
- "loss": 1.0711,
58
- "step": 3500
59
- },
60
- {
61
- "epoch": 0.15280006112002445,
62
- "grad_norm": 0.22121019661426544,
63
- "learning_rate": 1.9219743457067936e-05,
64
- "loss": 1.0719,
65
- "step": 4000
66
- },
67
- {
68
- "epoch": 0.1719000687600275,
69
- "grad_norm": 0.2431032508611679,
70
- "learning_rate": 1.8962699735040644e-05,
71
- "loss": 1.0711,
72
- "step": 4500
73
- },
74
- {
75
- "epoch": 0.19100007640003056,
76
- "grad_norm": 0.2221996784210205,
77
- "learning_rate": 1.8671367687380737e-05,
78
- "loss": 1.0717,
79
- "step": 5000
80
- },
81
- {
82
- "epoch": 0.2101000840400336,
83
- "grad_norm": 0.2070787101984024,
84
- "learning_rate": 1.834686185417242e-05,
85
- "loss": 1.0718,
86
- "step": 5500
87
- },
88
- {
89
- "epoch": 0.22920009168003666,
90
- "grad_norm": 0.2302798330783844,
91
- "learning_rate": 1.7990423687434358e-05,
92
- "loss": 1.0694,
93
- "step": 6000
94
- },
95
- {
96
- "epoch": 0.24830009932003974,
97
- "grad_norm": 0.2083750069141388,
98
- "learning_rate": 1.7603416801734742e-05,
99
- "loss": 1.0711,
100
- "step": 6500
101
- },
102
- {
103
- "epoch": 0.2674001069600428,
104
- "grad_norm": 0.22757408022880554,
105
- "learning_rate": 1.7187321757452828e-05,
106
- "loss": 1.0694,
107
- "step": 7000
108
- },
109
- {
110
- "epoch": 0.2865001146000458,
111
- "grad_norm": 0.22578437626361847,
112
- "learning_rate": 1.6743730396644495e-05,
113
- "loss": 1.0687,
114
- "step": 7500
115
- },
116
- {
117
- "epoch": 0.3056001222400489,
118
- "grad_norm": 0.21672435104846954,
119
- "learning_rate": 1.6274339753180888e-05,
120
- "loss": 1.0681,
121
- "step": 8000
122
- },
123
- {
124
- "epoch": 0.324700129880052,
125
- "grad_norm": 0.21607784926891327,
126
- "learning_rate": 1.578094556045794e-05,
127
- "loss": 1.067,
128
- "step": 8500
129
- },
130
- {
131
- "epoch": 0.343800137520055,
132
- "grad_norm": 0.22438710927963257,
133
- "learning_rate": 1.5265435381514187e-05,
134
- "loss": 1.0682,
135
- "step": 9000
136
- },
137
- {
138
- "epoch": 0.3629001451600581,
139
- "grad_norm": 0.20828410983085632,
140
- "learning_rate": 1.4729781387838703e-05,
141
- "loss": 1.0677,
142
- "step": 9500
143
- },
144
- {
145
- "epoch": 0.3820001528000611,
146
- "grad_norm": 0.23032569885253906,
147
- "learning_rate": 1.4176032814495078e-05,
148
- "loss": 1.0665,
149
- "step": 10000
150
- },
151
- {
152
- "epoch": 0.4011001604400642,
153
- "grad_norm": 0.21034111082553864,
154
- "learning_rate": 1.3606308120425677e-05,
155
- "loss": 1.0651,
156
- "step": 10500
157
- },
158
- {
159
- "epoch": 0.4202001680800672,
160
- "grad_norm": 0.22789451479911804,
161
- "learning_rate": 1.3022786883928191e-05,
162
- "loss": 1.0659,
163
- "step": 11000
164
- },
165
- {
166
- "epoch": 0.4393001757200703,
167
- "grad_norm": 0.21325276792049408,
168
- "learning_rate": 1.2427701464309752e-05,
169
- "loss": 1.0658,
170
- "step": 11500
171
- },
172
- {
173
- "epoch": 0.4584001833600733,
174
- "grad_norm": 0.2313033789396286,
175
- "learning_rate": 1.1823328461618373e-05,
176
- "loss": 1.063,
177
- "step": 12000
178
- },
179
- {
180
- "epoch": 0.4775001910000764,
181
- "grad_norm": 0.23162369430065155,
182
- "learning_rate": 1.1211980007123922e-05,
183
- "loss": 1.0635,
184
- "step": 12500
185
- },
186
- {
187
- "epoch": 0.4966001986400795,
188
- "grad_norm": 0.2397702932357788,
189
- "learning_rate": 1.0595994917868404e-05,
190
- "loss": 1.0631,
191
- "step": 13000
192
- },
193
- {
194
- "epoch": 0.5157002062800825,
195
- "grad_norm": 0.22418206930160522,
196
- "learning_rate": 9.9777297491253e-06,
197
- "loss": 1.0622,
198
- "step": 13500
199
- },
200
- {
201
- "epoch": 0.5348002139200856,
202
- "grad_norm": 0.2249964326620102,
203
- "learning_rate": 9.359549778998295e-06,
204
- "loss": 1.0618,
205
- "step": 14000
206
- },
207
- {
208
- "epoch": 0.5539002215600887,
209
- "grad_norm": 0.22192934155464172,
210
- "learning_rate": 8.743819959649335e-06,
211
- "loss": 1.0624,
212
- "step": 14500
213
- },
214
- {
215
- "epoch": 0.5730002292000916,
216
- "grad_norm": 0.23028317093849182,
217
- "learning_rate": 8.132895869773638e-06,
218
- "loss": 1.0606,
219
- "step": 15000
220
- },
221
- {
222
- "epoch": 0.5921002368400947,
223
- "grad_norm": 0.22582708299160004,
224
- "learning_rate": 7.529114702934506e-06,
225
- "loss": 1.0599,
226
- "step": 15500
227
- },
228
- {
229
- "epoch": 0.6112002444800978,
230
- "grad_norm": 0.22541284561157227,
231
- "learning_rate": 6.9347863262335226e-06,
232
- "loss": 1.0599,
233
- "step": 16000
234
- },
235
- {
236
- "epoch": 0.6303002521201009,
237
- "grad_norm": 0.22899463772773743,
238
- "learning_rate": 6.352184443522822e-06,
239
- "loss": 1.0581,
240
- "step": 16500
241
- },
242
- {
243
- "epoch": 0.649400259760104,
244
- "grad_norm": 0.22109055519104004,
245
- "learning_rate": 5.783537896965932e-06,
246
- "loss": 1.058,
247
- "step": 17000
248
- },
249
- {
250
- "epoch": 0.6685002674001069,
251
- "grad_norm": 0.21583594381809235,
252
- "learning_rate": 5.231022140224662e-06,
253
- "loss": 1.0562,
254
- "step": 17500
255
- },
256
- {
257
- "epoch": 0.68760027504011,
258
- "grad_norm": 0.2213190644979477,
259
- "learning_rate": 4.696750915892681e-06,
260
- "loss": 1.0574,
261
- "step": 18000
262
- },
263
- {
264
- "epoch": 0.7067002826801131,
265
- "grad_norm": 0.22758503258228302,
266
- "learning_rate": 4.182768169015299e-06,
267
- "loss": 1.0564,
268
- "step": 18500
269
- },
270
- {
271
- "epoch": 0.7258002903201162,
272
- "grad_norm": 0.22701376676559448,
273
- "learning_rate": 3.691040227631554e-06,
274
- "loss": 1.054,
275
- "step": 19000
276
- },
277
- {
278
- "epoch": 0.7449002979601191,
279
- "grad_norm": 0.23496034741401672,
280
- "learning_rate": 3.2234482802533075e-06,
281
- "loss": 1.0536,
282
- "step": 19500
283
- },
284
- {
285
- "epoch": 0.7640003056001222,
286
- "grad_norm": 0.2208418995141983,
287
- "learning_rate": 2.78178117906002e-06,
288
- "loss": 1.0538,
289
- "step": 20000
290
- },
291
- {
292
- "epoch": 0.7831003132401253,
293
- "grad_norm": 0.2349170446395874,
294
- "learning_rate": 2.3677285963418093e-06,
295
- "loss": 1.0531,
296
- "step": 20500
297
- },
298
- {
299
- "epoch": 0.8022003208801284,
300
- "grad_norm": 0.21593229472637177,
301
- "learning_rate": 1.9828745603719603e-06,
302
- "loss": 1.0526,
303
- "step": 21000
304
- },
305
- {
306
- "epoch": 0.8213003285201315,
307
- "grad_norm": 0.21944259107112885,
308
- "learning_rate": 1.6286913954385164e-06,
309
- "loss": 1.0523,
310
- "step": 21500
311
- },
312
- {
313
- "epoch": 0.8404003361601344,
314
- "grad_norm": 0.2327922135591507,
315
- "learning_rate": 1.3065340892183886e-06,
316
- "loss": 1.051,
317
- "step": 22000
318
- },
319
- {
320
- "epoch": 0.8595003438001375,
321
- "grad_norm": 0.21020913124084473,
322
- "learning_rate": 1.017635109042534e-06,
323
- "loss": 1.0501,
324
- "step": 22500
325
- },
326
- {
327
- "epoch": 0.8786003514401406,
328
- "grad_norm": 0.21785978972911835,
329
- "learning_rate": 7.630996868834629e-07,
330
- "loss": 1.0514,
331
- "step": 23000
332
- },
333
- {
334
- "epoch": 0.8977003590801437,
335
- "grad_norm": 0.2287386804819107,
336
- "learning_rate": 5.439015911031709e-07,
337
- "loss": 1.0509,
338
- "step": 23500
339
- },
340
- {
341
- "epoch": 0.9168003667201466,
342
- "grad_norm": 0.21757139265537262,
343
- "learning_rate": 3.608794011373562e-07,
344
- "loss": 1.0502,
345
- "step": 24000
346
- },
347
- {
348
- "epoch": 0.9359003743601497,
349
- "grad_norm": 0.23382288217544556,
350
- "learning_rate": 2.147332993677864e-07,
351
- "loss": 1.0494,
352
- "step": 24500
353
- },
354
- {
355
- "epoch": 0.9550003820001528,
356
- "grad_norm": 0.2304593026638031,
357
- "learning_rate": 1.0602239245601997e-07,
358
- "loss": 1.0494,
359
- "step": 25000
360
- },
361
- {
362
- "epoch": 0.9741003896401559,
363
- "grad_norm": 0.2088557332754135,
364
- "learning_rate": 3.5162572386192587e-08,
365
- "loss": 1.0492,
366
- "step": 25500
367
- },
368
- {
369
- "epoch": 0.993200397280159,
370
- "grad_norm": 0.22690053284168243,
371
- "learning_rate": 2.424925399798772e-09,
372
- "loss": 1.0476,
373
- "step": 26000
374
- },
375
- {
376
- "epoch": 1.0,
377
- "step": 26178,
378
- "total_flos": 5.395358533078221e+18,
379
- "train_loss": 1.0762467793451473,
380
- "train_runtime": 9311.7449,
381
- "train_samples_per_second": 89.96,
382
- "train_steps_per_second": 2.811
383
- }
384
- ],
385
- "logging_steps": 500,
386
- "max_steps": 26178,
387
- "num_input_tokens_seen": 0,
388
- "num_train_epochs": 1,
389
- "save_steps": 20000,
390
- "stateful_callbacks": {
391
- "TrainerControl": {
392
- "args": {
393
- "should_epoch_stop": false,
394
- "should_evaluate": false,
395
- "should_log": false,
396
- "should_save": true,
397
- "should_training_stop": true
398
- },
399
- "attributes": {}
400
- }
401
- },
402
- "total_flos": 5.395358533078221e+18,
403
- "train_batch_size": 4,
404
- "trial_name": null,
405
- "trial_params": null
406
- }