JacopoAbate commited on
Commit
d70658e
1 Parent(s): 1fc7e84

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -408
trainer_state.json DELETED
@@ -1,408 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.1998400213304894,
5
- "eval_steps": 500,
6
- "global_step": 4500,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.03,
13
- "grad_norm": 0.5563874321368991,
14
- "learning_rate": 4.000000000000001e-06,
15
- "loss": 1.58,
16
- "step": 100
17
- },
18
- {
19
- "epoch": 0.05,
20
- "grad_norm": 0.4957053999852371,
21
- "learning_rate": 8.000000000000001e-06,
22
- "loss": 1.4463,
23
- "step": 200
24
- },
25
- {
26
- "epoch": 0.08,
27
- "grad_norm": 0.589756030703148,
28
- "learning_rate": 1.2e-05,
29
- "loss": 1.3542,
30
- "step": 300
31
- },
32
- {
33
- "epoch": 0.11,
34
- "grad_norm": 0.6420302678887735,
35
- "learning_rate": 1.6000000000000003e-05,
36
- "loss": 1.3331,
37
- "step": 400
38
- },
39
- {
40
- "epoch": 0.13,
41
- "grad_norm": 0.7649283840590947,
42
- "learning_rate": 2e-05,
43
- "loss": 1.3253,
44
- "step": 500
45
- },
46
- {
47
- "epoch": 0.13,
48
- "eval_loss": 1.3249897956848145,
49
- "eval_runtime": 2000.6322,
50
- "eval_samples_per_second": 6.665,
51
- "eval_steps_per_second": 0.833,
52
- "step": 500
53
- },
54
- {
55
- "epoch": 0.16,
56
- "grad_norm": 0.7507878377667085,
57
- "learning_rate": 1.9714285714285718e-05,
58
- "loss": 1.3221,
59
- "step": 600
60
- },
61
- {
62
- "epoch": 0.19,
63
- "grad_norm": 0.6176603895840452,
64
- "learning_rate": 1.942857142857143e-05,
65
- "loss": 1.3126,
66
- "step": 700
67
- },
68
- {
69
- "epoch": 0.21,
70
- "grad_norm": 0.7327398805078954,
71
- "learning_rate": 1.9142857142857146e-05,
72
- "loss": 1.3055,
73
- "step": 800
74
- },
75
- {
76
- "epoch": 0.24,
77
- "grad_norm": 0.6839292302496646,
78
- "learning_rate": 1.885714285714286e-05,
79
- "loss": 1.3093,
80
- "step": 900
81
- },
82
- {
83
- "epoch": 0.27,
84
- "grad_norm": 0.607074650571682,
85
- "learning_rate": 1.8571428571428575e-05,
86
- "loss": 1.2985,
87
- "step": 1000
88
- },
89
- {
90
- "epoch": 0.27,
91
- "eval_loss": 1.2995541095733643,
92
- "eval_runtime": 1951.5754,
93
- "eval_samples_per_second": 6.833,
94
- "eval_steps_per_second": 0.854,
95
- "step": 1000
96
- },
97
- {
98
- "epoch": 0.29,
99
- "grad_norm": 0.74727636749315,
100
- "learning_rate": 1.8285714285714288e-05,
101
- "loss": 1.3017,
102
- "step": 1100
103
- },
104
- {
105
- "epoch": 0.32,
106
- "grad_norm": 0.6311499573020434,
107
- "learning_rate": 1.8e-05,
108
- "loss": 1.2896,
109
- "step": 1200
110
- },
111
- {
112
- "epoch": 0.35,
113
- "grad_norm": 0.5915205156004358,
114
- "learning_rate": 1.7714285714285717e-05,
115
- "loss": 1.2834,
116
- "step": 1300
117
- },
118
- {
119
- "epoch": 0.37,
120
- "grad_norm": 0.5946814358633083,
121
- "learning_rate": 1.742857142857143e-05,
122
- "loss": 1.3073,
123
- "step": 1400
124
- },
125
- {
126
- "epoch": 0.4,
127
- "grad_norm": 0.6097257778288048,
128
- "learning_rate": 1.7142857142857142e-05,
129
- "loss": 1.2843,
130
- "step": 1500
131
- },
132
- {
133
- "epoch": 0.4,
134
- "eval_loss": 1.2924432754516602,
135
- "eval_runtime": 1987.1893,
136
- "eval_samples_per_second": 6.71,
137
- "eval_steps_per_second": 0.839,
138
- "step": 1500
139
- },
140
- {
141
- "epoch": 0.43,
142
- "grad_norm": 0.596713525589904,
143
- "learning_rate": 1.6857142857142858e-05,
144
- "loss": 1.2748,
145
- "step": 1600
146
- },
147
- {
148
- "epoch": 0.45,
149
- "grad_norm": 0.6872784300870175,
150
- "learning_rate": 1.6571428571428574e-05,
151
- "loss": 1.2971,
152
- "step": 1700
153
- },
154
- {
155
- "epoch": 0.48,
156
- "grad_norm": 0.5519473130229932,
157
- "learning_rate": 1.6285714285714287e-05,
158
- "loss": 1.2746,
159
- "step": 1800
160
- },
161
- {
162
- "epoch": 0.51,
163
- "grad_norm": 0.6633345934082727,
164
- "learning_rate": 1.6000000000000003e-05,
165
- "loss": 1.2906,
166
- "step": 1900
167
- },
168
- {
169
- "epoch": 0.53,
170
- "grad_norm": 0.6476833370052498,
171
- "learning_rate": 1.5714285714285715e-05,
172
- "loss": 1.2813,
173
- "step": 2000
174
- },
175
- {
176
- "epoch": 0.53,
177
- "eval_loss": 1.2873681783676147,
178
- "eval_runtime": 1954.0241,
179
- "eval_samples_per_second": 6.824,
180
- "eval_steps_per_second": 0.853,
181
- "step": 2000
182
- },
183
- {
184
- "epoch": 0.56,
185
- "grad_norm": 0.6831512981144761,
186
- "learning_rate": 1.542857142857143e-05,
187
- "loss": 1.2973,
188
- "step": 2100
189
- },
190
- {
191
- "epoch": 0.59,
192
- "grad_norm": 0.6111369458671693,
193
- "learning_rate": 1.5142857142857144e-05,
194
- "loss": 1.2847,
195
- "step": 2200
196
- },
197
- {
198
- "epoch": 0.61,
199
- "grad_norm": 0.6468631061437224,
200
- "learning_rate": 1.4857142857142858e-05,
201
- "loss": 1.2754,
202
- "step": 2300
203
- },
204
- {
205
- "epoch": 0.64,
206
- "grad_norm": 0.6799634395265591,
207
- "learning_rate": 1.4571428571428573e-05,
208
- "loss": 1.2857,
209
- "step": 2400
210
- },
211
- {
212
- "epoch": 0.67,
213
- "grad_norm": 0.7108542425010391,
214
- "learning_rate": 1.4285714285714287e-05,
215
- "loss": 1.289,
216
- "step": 2500
217
- },
218
- {
219
- "epoch": 0.67,
220
- "eval_loss": 1.283585786819458,
221
- "eval_runtime": 1870.2159,
222
- "eval_samples_per_second": 7.13,
223
- "eval_steps_per_second": 0.891,
224
- "step": 2500
225
- },
226
- {
227
- "epoch": 0.69,
228
- "grad_norm": 0.7253629154301096,
229
- "learning_rate": 1.4e-05,
230
- "loss": 1.2793,
231
- "step": 2600
232
- },
233
- {
234
- "epoch": 0.72,
235
- "grad_norm": 0.614991663938332,
236
- "learning_rate": 1.3714285714285716e-05,
237
- "loss": 1.2767,
238
- "step": 2700
239
- },
240
- {
241
- "epoch": 0.75,
242
- "grad_norm": 0.5699407855446496,
243
- "learning_rate": 1.3428571428571429e-05,
244
- "loss": 1.2719,
245
- "step": 2800
246
- },
247
- {
248
- "epoch": 0.77,
249
- "grad_norm": 0.763568295530126,
250
- "learning_rate": 1.3142857142857145e-05,
251
- "loss": 1.2733,
252
- "step": 2900
253
- },
254
- {
255
- "epoch": 0.8,
256
- "grad_norm": 0.6538876064837678,
257
- "learning_rate": 1.2857142857142859e-05,
258
- "loss": 1.2822,
259
- "step": 3000
260
- },
261
- {
262
- "epoch": 0.8,
263
- "eval_loss": 1.2801251411437988,
264
- "eval_runtime": 1889.0286,
265
- "eval_samples_per_second": 7.059,
266
- "eval_steps_per_second": 0.882,
267
- "step": 3000
268
- },
269
- {
270
- "epoch": 0.83,
271
- "grad_norm": 0.6413250796291871,
272
- "learning_rate": 1.2571428571428572e-05,
273
- "loss": 1.2709,
274
- "step": 3100
275
- },
276
- {
277
- "epoch": 0.85,
278
- "grad_norm": 0.7125599551490435,
279
- "learning_rate": 1.2285714285714288e-05,
280
- "loss": 1.2829,
281
- "step": 3200
282
- },
283
- {
284
- "epoch": 0.88,
285
- "grad_norm": 0.6792031678140497,
286
- "learning_rate": 1.2e-05,
287
- "loss": 1.2728,
288
- "step": 3300
289
- },
290
- {
291
- "epoch": 0.91,
292
- "grad_norm": 0.6989586202409576,
293
- "learning_rate": 1.1714285714285716e-05,
294
- "loss": 1.2835,
295
- "step": 3400
296
- },
297
- {
298
- "epoch": 0.93,
299
- "grad_norm": 0.7569471708203911,
300
- "learning_rate": 1.1428571428571429e-05,
301
- "loss": 1.2793,
302
- "step": 3500
303
- },
304
- {
305
- "epoch": 0.93,
306
- "eval_loss": 1.2773550748825073,
307
- "eval_runtime": 1896.8518,
308
- "eval_samples_per_second": 7.03,
309
- "eval_steps_per_second": 0.879,
310
- "step": 3500
311
- },
312
- {
313
- "epoch": 0.96,
314
- "grad_norm": 0.6242374298012554,
315
- "learning_rate": 1.1142857142857143e-05,
316
- "loss": 1.2805,
317
- "step": 3600
318
- },
319
- {
320
- "epoch": 0.99,
321
- "grad_norm": 0.6585613373985719,
322
- "learning_rate": 1.0857142857142858e-05,
323
- "loss": 1.265,
324
- "step": 3700
325
- },
326
- {
327
- "epoch": 1.01,
328
- "grad_norm": 0.7452400076297672,
329
- "learning_rate": 1.0571428571428572e-05,
330
- "loss": 1.2589,
331
- "step": 3800
332
- },
333
- {
334
- "epoch": 1.04,
335
- "grad_norm": 0.7861020289489161,
336
- "learning_rate": 1.0285714285714285e-05,
337
- "loss": 1.2616,
338
- "step": 3900
339
- },
340
- {
341
- "epoch": 1.07,
342
- "grad_norm": 0.7757094950303476,
343
- "learning_rate": 1e-05,
344
- "loss": 1.2721,
345
- "step": 4000
346
- },
347
- {
348
- "epoch": 1.07,
349
- "eval_loss": 1.2751243114471436,
350
- "eval_runtime": 1895.5553,
351
- "eval_samples_per_second": 7.035,
352
- "eval_steps_per_second": 0.879,
353
- "step": 4000
354
- },
355
- {
356
- "epoch": 1.09,
357
- "grad_norm": 0.6999523427612176,
358
- "learning_rate": 9.714285714285715e-06,
359
- "loss": 1.2649,
360
- "step": 4100
361
- },
362
- {
363
- "epoch": 1.12,
364
- "grad_norm": 0.6528269639884913,
365
- "learning_rate": 9.42857142857143e-06,
366
- "loss": 1.2675,
367
- "step": 4200
368
- },
369
- {
370
- "epoch": 1.15,
371
- "grad_norm": 0.8019204201326198,
372
- "learning_rate": 9.142857142857144e-06,
373
- "loss": 1.2775,
374
- "step": 4300
375
- },
376
- {
377
- "epoch": 1.17,
378
- "grad_norm": 0.8453041527524593,
379
- "learning_rate": 8.857142857142858e-06,
380
- "loss": 1.2682,
381
- "step": 4400
382
- },
383
- {
384
- "epoch": 1.2,
385
- "grad_norm": 0.6858153075780669,
386
- "learning_rate": 8.571428571428571e-06,
387
- "loss": 1.2753,
388
- "step": 4500
389
- },
390
- {
391
- "epoch": 1.2,
392
- "eval_loss": 1.2731624841690063,
393
- "eval_runtime": 2031.5498,
394
- "eval_samples_per_second": 6.564,
395
- "eval_steps_per_second": 0.821,
396
- "step": 4500
397
- }
398
- ],
399
- "logging_steps": 100,
400
- "max_steps": 7500,
401
- "num_input_tokens_seen": 0,
402
- "num_train_epochs": 2,
403
- "save_steps": 500,
404
- "total_flos": 6.185622662887244e+18,
405
- "train_batch_size": 8,
406
- "trial_name": null,
407
- "trial_params": null
408
- }