josesantorcuato commited on
Commit
c5c6577
1 Parent(s): c72fe97

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.080434782608696,
3
- "eval_accuracy": 0.9617834394904459,
4
- "eval_loss": 0.1620456427335739,
5
- "eval_runtime": 14.8873,
6
- "eval_samples_per_second": 10.546,
7
- "eval_steps_per_second": 1.343
8
  }
 
1
  {
2
+ "epoch": 9.04375,
3
+ "eval_accuracy": 0.5714285714285714,
4
+ "eval_loss": 1.1454538106918335,
5
+ "eval_runtime": 3.2395,
6
+ "eval_samples_per_second": 8.643,
7
+ "eval_steps_per_second": 1.235
8
  }
runs/Nov05_02-16-41_93cbaac1b31b/events.out.tfevents.1730773411.93cbaac1b31b.4486.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fd74d8cf9078fd862acd198f7d95b0b259e9557fdbe0656c64d6e88442b569d
3
- size 411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a74e0e82e3403cac0a885363d257c8b1dbc2369cdb37c6f5b6b048135071570
3
+ size 734
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.080434782608696,
3
- "eval_accuracy": 0.9617834394904459,
4
- "eval_loss": 0.1620456427335739,
5
- "eval_runtime": 14.8873,
6
- "eval_samples_per_second": 10.546,
7
- "eval_steps_per_second": 1.343
8
  }
 
1
  {
2
+ "epoch": 9.04375,
3
+ "eval_accuracy": 0.5714285714285714,
4
+ "eval_loss": 1.1454538106918335,
5
+ "eval_runtime": 3.2395,
6
+ "eval_samples_per_second": 8.643,
7
+ "eval_steps_per_second": 1.235
8
  }
trainer_state.json CHANGED
@@ -1,455 +1,245 @@
1
  {
2
- "best_metric": 0.9848484848484849,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-329",
4
- "epoch": 9.080434782608696,
5
  "eval_steps": 500,
6
- "global_step": 460,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.021739130434782608,
13
- "grad_norm": 9.379189491271973,
14
- "learning_rate": 1.0869565217391305e-05,
15
- "loss": 2.5308,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.043478260869565216,
20
- "grad_norm": 10.402758598327637,
21
- "learning_rate": 2.173913043478261e-05,
22
- "loss": 2.4704,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.06521739130434782,
27
- "grad_norm": 8.703824043273926,
28
- "learning_rate": 3.260869565217392e-05,
29
- "loss": 2.4447,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.08695652173913043,
34
- "grad_norm": 10.318586349487305,
35
- "learning_rate": 4.347826086956522e-05,
36
- "loss": 2.2403,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.10217391304347827,
41
- "eval_accuracy": 0.13636363636363635,
42
- "eval_loss": 2.2386703491210938,
43
- "eval_runtime": 6.4992,
44
- "eval_samples_per_second": 10.155,
45
- "eval_steps_per_second": 1.385,
46
- "step": 47
47
- },
48
- {
49
- "epoch": 1.0065217391304349,
50
- "grad_norm": 8.518580436706543,
51
- "learning_rate": 4.9516908212560386e-05,
52
- "loss": 2.215,
53
- "step": 50
54
- },
55
- {
56
- "epoch": 1.0282608695652173,
57
- "grad_norm": 11.493647575378418,
58
- "learning_rate": 4.830917874396135e-05,
59
- "loss": 1.8724,
60
- "step": 60
61
- },
62
- {
63
- "epoch": 1.05,
64
- "grad_norm": 9.734604835510254,
65
- "learning_rate": 4.710144927536232e-05,
66
- "loss": 1.7272,
67
- "step": 70
68
- },
69
- {
70
- "epoch": 1.0717391304347825,
71
- "grad_norm": 9.319704055786133,
72
- "learning_rate": 4.589371980676328e-05,
73
- "loss": 1.5902,
74
- "step": 80
75
- },
76
- {
77
- "epoch": 1.0934782608695652,
78
- "grad_norm": 7.253905773162842,
79
- "learning_rate": 4.4685990338164255e-05,
80
- "loss": 1.2918,
81
- "step": 90
82
  },
83
  {
84
- "epoch": 1.1021739130434782,
85
- "eval_accuracy": 0.5151515151515151,
86
- "eval_loss": 1.3612327575683594,
87
- "eval_runtime": 6.203,
88
- "eval_samples_per_second": 10.64,
89
- "eval_steps_per_second": 1.451,
90
- "step": 94
91
- },
92
- {
93
- "epoch": 2.0130434782608697,
94
- "grad_norm": 9.277091026306152,
95
- "learning_rate": 4.347826086956522e-05,
96
- "loss": 1.1276,
97
- "step": 100
98
  },
99
  {
100
- "epoch": 2.034782608695652,
101
- "grad_norm": 6.776515960693359,
102
- "learning_rate": 4.2270531400966186e-05,
103
- "loss": 0.8852,
104
- "step": 110
105
  },
106
  {
107
- "epoch": 2.0565217391304347,
108
- "grad_norm": 7.559288501739502,
109
- "learning_rate": 4.106280193236715e-05,
110
- "loss": 0.8805,
111
- "step": 120
 
 
112
  },
113
  {
114
- "epoch": 2.0782608695652174,
115
- "grad_norm": 14.196601867675781,
116
- "learning_rate": 3.985507246376812e-05,
117
- "loss": 0.7797,
118
- "step": 130
119
  },
120
  {
121
  "epoch": 2.1,
122
- "grad_norm": 14.56787109375,
123
- "learning_rate": 3.864734299516908e-05,
124
- "loss": 0.8662,
125
- "step": 140
126
- },
127
- {
128
- "epoch": 2.1021739130434782,
129
- "eval_accuracy": 0.7424242424242424,
130
- "eval_loss": 0.8151518702507019,
131
- "eval_runtime": 6.4913,
132
- "eval_samples_per_second": 10.167,
133
- "eval_steps_per_second": 1.386,
134
- "step": 141
135
- },
136
- {
137
- "epoch": 3.0195652173913046,
138
- "grad_norm": 6.646560192108154,
139
- "learning_rate": 3.743961352657005e-05,
140
- "loss": 0.6663,
141
- "step": 150
142
- },
143
- {
144
- "epoch": 3.041304347826087,
145
- "grad_norm": 6.8809356689453125,
146
- "learning_rate": 3.6231884057971014e-05,
147
- "loss": 0.549,
148
- "step": 160
149
- },
150
- {
151
- "epoch": 3.0630434782608695,
152
- "grad_norm": 12.483465194702148,
153
- "learning_rate": 3.502415458937198e-05,
154
- "loss": 0.5251,
155
- "step": 170
156
- },
157
- {
158
- "epoch": 3.0847826086956522,
159
- "grad_norm": 7.2777276039123535,
160
- "learning_rate": 3.381642512077295e-05,
161
- "loss": 0.6072,
162
- "step": 180
163
- },
164
- {
165
- "epoch": 3.1021739130434782,
166
- "eval_accuracy": 0.8939393939393939,
167
- "eval_loss": 0.39679834246635437,
168
- "eval_runtime": 5.8658,
169
- "eval_samples_per_second": 11.252,
170
- "eval_steps_per_second": 1.534,
171
- "step": 188
172
  },
173
  {
174
- "epoch": 4.004347826086956,
175
- "grad_norm": 6.928377628326416,
176
- "learning_rate": 3.260869565217392e-05,
177
- "loss": 0.4876,
178
- "step": 190
 
 
179
  },
180
  {
181
- "epoch": 4.026086956521739,
182
- "grad_norm": 19.643159866333008,
183
- "learning_rate": 3.140096618357488e-05,
184
- "loss": 0.3014,
185
- "step": 200
186
  },
187
  {
188
- "epoch": 4.047826086956522,
189
- "grad_norm": 4.118589401245117,
190
- "learning_rate": 3.0193236714975848e-05,
191
- "loss": 0.2793,
192
- "step": 210
 
 
193
  },
194
  {
195
- "epoch": 4.069565217391304,
196
- "grad_norm": 1.8621646165847778,
197
- "learning_rate": 2.8985507246376814e-05,
198
- "loss": 0.2754,
199
- "step": 220
200
  },
201
  {
202
- "epoch": 4.091304347826087,
203
- "grad_norm": 18.98653793334961,
204
  "learning_rate": 2.777777777777778e-05,
205
- "loss": 0.2958,
206
- "step": 230
207
- },
208
- {
209
- "epoch": 4.102173913043479,
210
- "eval_accuracy": 0.8787878787878788,
211
- "eval_loss": 0.3365328013896942,
212
- "eval_runtime": 5.9112,
213
- "eval_samples_per_second": 11.165,
214
- "eval_steps_per_second": 1.523,
215
- "step": 235
216
- },
217
- {
218
- "epoch": 5.010869565217392,
219
- "grad_norm": 11.831360816955566,
220
- "learning_rate": 2.6570048309178748e-05,
221
- "loss": 0.2189,
222
- "step": 240
223
- },
224
- {
225
- "epoch": 5.032608695652174,
226
- "grad_norm": 5.401520252227783,
227
- "learning_rate": 2.5362318840579714e-05,
228
- "loss": 0.1791,
229
- "step": 250
230
- },
231
- {
232
- "epoch": 5.054347826086956,
233
- "grad_norm": 9.114124298095703,
234
- "learning_rate": 2.4154589371980676e-05,
235
- "loss": 0.2604,
236
- "step": 260
237
- },
238
- {
239
- "epoch": 5.076086956521739,
240
- "grad_norm": 1.7160027027130127,
241
- "learning_rate": 2.294685990338164e-05,
242
- "loss": 0.1396,
243
- "step": 270
244
- },
245
- {
246
- "epoch": 5.0978260869565215,
247
- "grad_norm": 1.6239838600158691,
248
- "learning_rate": 2.173913043478261e-05,
249
- "loss": 0.1534,
250
- "step": 280
251
- },
252
- {
253
- "epoch": 5.102173913043479,
254
- "eval_accuracy": 0.9242424242424242,
255
- "eval_loss": 0.25064730644226074,
256
- "eval_runtime": 5.9732,
257
- "eval_samples_per_second": 11.049,
258
- "eval_steps_per_second": 1.507,
259
- "step": 282
260
- },
261
- {
262
- "epoch": 6.017391304347826,
263
- "grad_norm": 11.243247032165527,
264
- "learning_rate": 2.0531400966183576e-05,
265
- "loss": 0.1272,
266
- "step": 290
267
- },
268
- {
269
- "epoch": 6.039130434782609,
270
- "grad_norm": 11.26307487487793,
271
- "learning_rate": 1.932367149758454e-05,
272
- "loss": 0.0651,
273
- "step": 300
274
- },
275
- {
276
- "epoch": 6.060869565217391,
277
- "grad_norm": 18.92414665222168,
278
- "learning_rate": 1.8115942028985507e-05,
279
- "loss": 0.167,
280
- "step": 310
281
- },
282
- {
283
- "epoch": 6.082608695652174,
284
- "grad_norm": 0.3199400305747986,
285
- "learning_rate": 1.6908212560386476e-05,
286
- "loss": 0.0907,
287
- "step": 320
288
- },
289
- {
290
- "epoch": 6.102173913043479,
291
- "eval_accuracy": 0.9848484848484849,
292
- "eval_loss": 0.1101275309920311,
293
- "eval_runtime": 6.2213,
294
- "eval_samples_per_second": 10.609,
295
- "eval_steps_per_second": 1.447,
296
- "step": 329
297
- },
298
- {
299
- "epoch": 7.002173913043478,
300
- "grad_norm": 5.541162490844727,
301
- "learning_rate": 1.570048309178744e-05,
302
- "loss": 0.1261,
303
- "step": 330
304
- },
305
- {
306
- "epoch": 7.023913043478261,
307
- "grad_norm": 9.781050682067871,
308
- "learning_rate": 1.4492753623188407e-05,
309
- "loss": 0.0858,
310
- "step": 340
311
- },
312
- {
313
- "epoch": 7.0456521739130435,
314
- "grad_norm": 15.221212387084961,
315
- "learning_rate": 1.3285024154589374e-05,
316
- "loss": 0.0677,
317
- "step": 350
318
  },
319
  {
320
- "epoch": 7.067391304347826,
321
- "grad_norm": 0.271314412355423,
322
- "learning_rate": 1.2077294685990338e-05,
323
- "loss": 0.0875,
324
- "step": 360
 
 
325
  },
326
  {
327
- "epoch": 7.089130434782609,
328
- "grad_norm": 7.625803470611572,
329
- "learning_rate": 1.0869565217391305e-05,
330
- "loss": 0.1085,
331
- "step": 370
332
  },
333
  {
334
- "epoch": 7.102173913043479,
335
- "eval_accuracy": 0.9545454545454546,
336
- "eval_loss": 0.10326449573040009,
337
- "eval_runtime": 6.6343,
338
- "eval_samples_per_second": 9.948,
339
- "eval_steps_per_second": 1.357,
340
- "step": 376
341
  },
342
  {
343
- "epoch": 8.008695652173913,
344
- "grad_norm": 0.3747415244579315,
345
- "learning_rate": 9.66183574879227e-06,
346
- "loss": 0.1309,
347
- "step": 380
 
 
348
  },
349
  {
350
- "epoch": 8.030434782608696,
351
- "grad_norm": 0.6200582981109619,
352
- "learning_rate": 8.454106280193238e-06,
353
- "loss": 0.0571,
354
- "step": 390
355
  },
356
  {
357
- "epoch": 8.052173913043479,
358
- "grad_norm": 0.3507235646247864,
359
- "learning_rate": 7.246376811594203e-06,
360
- "loss": 0.0293,
361
- "step": 400
 
 
362
  },
363
  {
364
- "epoch": 8.07391304347826,
365
- "grad_norm": 0.3272978961467743,
366
- "learning_rate": 6.038647342995169e-06,
367
- "loss": 0.0481,
368
- "step": 410
369
  },
370
  {
371
- "epoch": 8.095652173913043,
372
- "grad_norm": 0.4806969165802002,
373
- "learning_rate": 4.830917874396135e-06,
374
- "loss": 0.0666,
375
- "step": 420
376
  },
377
  {
378
- "epoch": 8.102173913043478,
379
- "eval_accuracy": 0.9696969696969697,
380
- "eval_loss": 0.10309642553329468,
381
- "eval_runtime": 6.078,
382
- "eval_samples_per_second": 10.859,
383
- "eval_steps_per_second": 1.481,
384
- "step": 423
385
  },
386
  {
387
- "epoch": 9.015217391304347,
388
- "grad_norm": 0.6005312204360962,
389
- "learning_rate": 3.6231884057971017e-06,
390
- "loss": 0.0985,
391
- "step": 430
392
  },
393
  {
394
- "epoch": 9.03695652173913,
395
- "grad_norm": 1.0368554592132568,
396
- "learning_rate": 2.4154589371980677e-06,
397
- "loss": 0.0249,
398
- "step": 440
399
  },
400
  {
401
- "epoch": 9.058695652173913,
402
- "grad_norm": 0.20510777831077576,
403
- "learning_rate": 1.2077294685990338e-06,
404
- "loss": 0.0141,
405
- "step": 450
 
 
406
  },
407
  {
408
- "epoch": 9.080434782608696,
409
- "grad_norm": 0.4739467203617096,
410
  "learning_rate": 0.0,
411
- "loss": 0.0251,
412
- "step": 460
413
  },
414
  {
415
- "epoch": 9.080434782608696,
416
- "eval_accuracy": 0.9696969696969697,
417
- "eval_loss": 0.11189308762550354,
418
- "eval_runtime": 8.098,
419
- "eval_samples_per_second": 8.15,
420
- "eval_steps_per_second": 1.111,
421
- "step": 460
422
  },
423
  {
424
- "epoch": 9.080434782608696,
425
- "step": 460,
426
- "total_flos": 4.5186331435416945e+18,
427
- "train_loss": 0.6365434888264407,
428
- "train_runtime": 731.5291,
429
- "train_samples_per_second": 5.031,
430
- "train_steps_per_second": 0.629
431
  },
432
  {
433
- "epoch": 9.080434782608696,
434
- "eval_accuracy": 0.9617834394904459,
435
- "eval_loss": 0.1620456427335739,
436
- "eval_runtime": 14.6193,
437
- "eval_samples_per_second": 10.739,
438
- "eval_steps_per_second": 1.368,
439
- "step": 460
440
  },
441
  {
442
- "epoch": 9.080434782608696,
443
- "eval_accuracy": 0.9617834394904459,
444
- "eval_loss": 0.1620456427335739,
445
- "eval_runtime": 14.8873,
446
- "eval_samples_per_second": 10.546,
447
- "eval_steps_per_second": 1.343,
448
- "step": 460
449
  }
450
  ],
451
  "logging_steps": 10,
452
- "max_steps": 460,
453
  "num_input_tokens_seen": 0,
454
  "num_train_epochs": 9223372036854775807,
455
  "save_steps": 500,
@@ -465,7 +255,7 @@
465
  "attributes": {}
466
  }
467
  },
468
- "total_flos": 4.5186331435416945e+18,
469
  "train_batch_size": 8,
470
  "trial_name": null,
471
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6428571428571429,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-153",
4
+ "epoch": 9.04375,
5
  "eval_steps": 500,
6
+ "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0625,
13
+ "grad_norm": 8.446556091308594,
14
+ "learning_rate": 3.125e-05,
15
+ "loss": 1.4262,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.10625,
20
+ "eval_accuracy": 0.35714285714285715,
21
+ "eval_loss": 1.4080798625946045,
22
+ "eval_runtime": 4.4913,
23
+ "eval_samples_per_second": 3.117,
24
+ "eval_steps_per_second": 0.445,
25
+ "step": 17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
  {
28
+ "epoch": 1.01875,
29
+ "grad_norm": 9.888665199279785,
30
+ "learning_rate": 4.8611111111111115e-05,
31
+ "loss": 1.3656,
32
+ "step": 20
 
 
 
 
 
 
 
 
 
33
  },
34
  {
35
+ "epoch": 1.08125,
36
+ "grad_norm": 5.500328540802002,
37
+ "learning_rate": 4.5138888888888894e-05,
38
+ "loss": 1.3918,
39
+ "step": 30
40
  },
41
  {
42
+ "epoch": 1.10625,
43
+ "eval_accuracy": 0.21428571428571427,
44
+ "eval_loss": 1.579779863357544,
45
+ "eval_runtime": 1.2328,
46
+ "eval_samples_per_second": 11.357,
47
+ "eval_steps_per_second": 1.622,
48
+ "step": 34
49
  },
50
  {
51
+ "epoch": 2.0375,
52
+ "grad_norm": 6.509393692016602,
53
+ "learning_rate": 4.166666666666667e-05,
54
+ "loss": 1.3092,
55
+ "step": 40
56
  },
57
  {
58
  "epoch": 2.1,
59
+ "grad_norm": 9.822772979736328,
60
+ "learning_rate": 3.8194444444444444e-05,
61
+ "loss": 1.2887,
62
+ "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  },
64
  {
65
+ "epoch": 2.10625,
66
+ "eval_accuracy": 0.5,
67
+ "eval_loss": 1.3649126291275024,
68
+ "eval_runtime": 1.2619,
69
+ "eval_samples_per_second": 11.095,
70
+ "eval_steps_per_second": 1.585,
71
+ "step": 51
72
  },
73
  {
74
+ "epoch": 3.05625,
75
+ "grad_norm": 7.699404239654541,
76
+ "learning_rate": 3.472222222222222e-05,
77
+ "loss": 1.3115,
78
+ "step": 60
79
  },
80
  {
81
+ "epoch": 3.10625,
82
+ "eval_accuracy": 0.42857142857142855,
83
+ "eval_loss": 1.4629420042037964,
84
+ "eval_runtime": 1.2724,
85
+ "eval_samples_per_second": 11.003,
86
+ "eval_steps_per_second": 1.572,
87
+ "step": 68
88
  },
89
  {
90
+ "epoch": 4.0125,
91
+ "grad_norm": 6.042768955230713,
92
+ "learning_rate": 3.125e-05,
93
+ "loss": 1.1749,
94
+ "step": 70
95
  },
96
  {
97
+ "epoch": 4.075,
98
+ "grad_norm": 6.405767917633057,
99
  "learning_rate": 2.777777777777778e-05,
100
+ "loss": 1.0533,
101
+ "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  },
103
  {
104
+ "epoch": 4.10625,
105
+ "eval_accuracy": 0.2857142857142857,
106
+ "eval_loss": 1.5868721008300781,
107
+ "eval_runtime": 1.3163,
108
+ "eval_samples_per_second": 10.636,
109
+ "eval_steps_per_second": 1.519,
110
+ "step": 85
111
  },
112
  {
113
+ "epoch": 5.03125,
114
+ "grad_norm": 5.696847915649414,
115
+ "learning_rate": 2.4305555555555558e-05,
116
+ "loss": 1.1042,
117
+ "step": 90
118
  },
119
  {
120
+ "epoch": 5.09375,
121
+ "grad_norm": 9.622387886047363,
122
+ "learning_rate": 2.0833333333333336e-05,
123
+ "loss": 0.9616,
124
+ "step": 100
 
 
125
  },
126
  {
127
+ "epoch": 5.10625,
128
+ "eval_accuracy": 0.35714285714285715,
129
+ "eval_loss": 1.7206089496612549,
130
+ "eval_runtime": 1.3192,
131
+ "eval_samples_per_second": 10.612,
132
+ "eval_steps_per_second": 1.516,
133
+ "step": 102
134
  },
135
  {
136
+ "epoch": 6.05,
137
+ "grad_norm": 10.931577682495117,
138
+ "learning_rate": 1.736111111111111e-05,
139
+ "loss": 0.9531,
140
+ "step": 110
141
  },
142
  {
143
+ "epoch": 6.10625,
144
+ "eval_accuracy": 0.35714285714285715,
145
+ "eval_loss": 1.440622329711914,
146
+ "eval_runtime": 1.1712,
147
+ "eval_samples_per_second": 11.954,
148
+ "eval_steps_per_second": 1.708,
149
+ "step": 119
150
  },
151
  {
152
+ "epoch": 7.00625,
153
+ "grad_norm": 12.279280662536621,
154
+ "learning_rate": 1.388888888888889e-05,
155
+ "loss": 0.7034,
156
+ "step": 120
157
  },
158
  {
159
+ "epoch": 7.06875,
160
+ "grad_norm": 15.174863815307617,
161
+ "learning_rate": 1.0416666666666668e-05,
162
+ "loss": 0.6531,
163
+ "step": 130
164
  },
165
  {
166
+ "epoch": 7.10625,
167
+ "eval_accuracy": 0.5,
168
+ "eval_loss": 1.329153299331665,
169
+ "eval_runtime": 1.1705,
170
+ "eval_samples_per_second": 11.96,
171
+ "eval_steps_per_second": 1.709,
172
+ "step": 136
173
  },
174
  {
175
+ "epoch": 8.025,
176
+ "grad_norm": 12.259533882141113,
177
+ "learning_rate": 6.944444444444445e-06,
178
+ "loss": 0.6896,
179
+ "step": 140
180
  },
181
  {
182
+ "epoch": 8.0875,
183
+ "grad_norm": 14.428750991821289,
184
+ "learning_rate": 3.4722222222222224e-06,
185
+ "loss": 0.5778,
186
+ "step": 150
187
  },
188
  {
189
+ "epoch": 8.10625,
190
+ "eval_accuracy": 0.6428571428571429,
191
+ "eval_loss": 1.121216058731079,
192
+ "eval_runtime": 1.0823,
193
+ "eval_samples_per_second": 12.936,
194
+ "eval_steps_per_second": 1.848,
195
+ "step": 153
196
  },
197
  {
198
+ "epoch": 9.04375,
199
+ "grad_norm": 12.301226615905762,
200
  "learning_rate": 0.0,
201
+ "loss": 0.4835,
202
+ "step": 160
203
  },
204
  {
205
+ "epoch": 9.04375,
206
+ "eval_accuracy": 0.5714285714285714,
207
+ "eval_loss": 1.113793134689331,
208
+ "eval_runtime": 1.2842,
209
+ "eval_samples_per_second": 10.902,
210
+ "eval_steps_per_second": 1.557,
211
+ "step": 160
212
  },
213
  {
214
+ "epoch": 9.04375,
215
+ "step": 160,
216
+ "total_flos": 1.5277024347362427e+18,
217
+ "train_loss": 1.0279614835977555,
218
+ "train_runtime": 373.3169,
219
+ "train_samples_per_second": 3.429,
220
+ "train_steps_per_second": 0.429
221
  },
222
  {
223
+ "epoch": 9.04375,
224
+ "eval_accuracy": 0.5714285714285714,
225
+ "eval_loss": 1.1454538106918335,
226
+ "eval_runtime": 10.4142,
227
+ "eval_samples_per_second": 2.689,
228
+ "eval_steps_per_second": 0.384,
229
+ "step": 160
230
  },
231
  {
232
+ "epoch": 9.04375,
233
+ "eval_accuracy": 0.5714285714285714,
234
+ "eval_loss": 1.1454538106918335,
235
+ "eval_runtime": 3.2395,
236
+ "eval_samples_per_second": 8.643,
237
+ "eval_steps_per_second": 1.235,
238
+ "step": 160
239
  }
240
  ],
241
  "logging_steps": 10,
242
+ "max_steps": 160,
243
  "num_input_tokens_seen": 0,
244
  "num_train_epochs": 9223372036854775807,
245
  "save_steps": 500,
 
255
  "attributes": {}
256
  }
257
  },
258
+ "total_flos": 1.5277024347362427e+18,
259
  "train_batch_size": 8,
260
  "trial_name": null,
261
  "trial_params": null