dima806 commited on
Commit
a31cb86
1 Parent(s): d1e7c2f

Upload folder using huggingface_hub

Browse files
checkpoint-32650/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af479e8111ef4375572925dd0fe92ff9a598e337c328f0fde4bf3c4ab72659e5
3
  size 344211388
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b5f4edd84d0939be14fc0d0c3144ca3b1b8e10556b1b5ec4bb8e6ccf46c541
3
  size 344211388
checkpoint-32650/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ca7cc4c72541a329040d9e6b30cd5329b5a53715196d5515bb6ab36bd9aa774
3
  size 688543237
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca6d5694df22ef3a59936b2a6f0e1aee07d4a90d876fb7cbb13d01f382350e68
3
  size 688543237
checkpoint-32650/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fd9228d06d24f0e09c32daf1dbf5a41bf5f327475cb7d2d081065a54ad1e3f8
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28fe0d0c7fc8a2d0e9e5cfd2629e9cad05774da5004b056a543b287dac11faa2
3
  size 627
checkpoint-32650/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 2.5461041927337646,
3
  "best_model_checkpoint": "car_models_image_detection/checkpoint-32650",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
@@ -10,557 +10,557 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.15313935681470137,
13
- "grad_norm": 1.442165732383728,
14
- "learning_rate": 8.875766871165644e-06,
15
- "loss": 5.7501,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.30627871362940273,
20
- "grad_norm": 1.5294591188430786,
21
- "learning_rate": 8.737730061349693e-06,
22
- "loss": 5.66,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.45941807044410415,
27
- "grad_norm": 1.7971255779266357,
28
- "learning_rate": 8.599693251533743e-06,
29
- "loss": 5.5686,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.6125574272588055,
34
- "grad_norm": 1.7858527898788452,
35
- "learning_rate": 8.461656441717792e-06,
36
- "loss": 5.4714,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 0.7656967840735069,
41
- "grad_norm": 1.6970139741897583,
42
- "learning_rate": 8.32361963190184e-06,
43
- "loss": 5.3715,
44
  "step": 2500
45
  },
46
  {
47
  "epoch": 0.9188361408882083,
48
- "grad_norm": 1.9722312688827515,
49
- "learning_rate": 8.18558282208589e-06,
50
- "loss": 5.2844,
51
  "step": 3000
52
  },
53
  {
54
  "epoch": 1.0,
55
- "eval_accuracy": 0.20365025344993468,
56
- "eval_loss": 5.212795734405518,
57
- "eval_model_preparation_time": 0.0035,
58
- "eval_runtime": 948.361,
59
- "eval_samples_per_second": 73.431,
60
- "eval_steps_per_second": 9.179,
61
  "step": 3265
62
  },
63
  {
64
  "epoch": 1.0719754977029097,
65
- "grad_norm": 2.317599296569824,
66
- "learning_rate": 8.047546012269938e-06,
67
- "loss": 5.1866,
68
  "step": 3500
69
  },
70
  {
71
  "epoch": 1.225114854517611,
72
- "grad_norm": 1.872455358505249,
73
- "learning_rate": 7.909509202453989e-06,
74
- "loss": 5.1003,
75
  "step": 4000
76
  },
77
  {
78
  "epoch": 1.3782542113323124,
79
- "grad_norm": 2.192427396774292,
80
- "learning_rate": 7.771472392638037e-06,
81
- "loss": 5.0184,
82
  "step": 4500
83
  },
84
  {
85
  "epoch": 1.5313935681470139,
86
- "grad_norm": 2.1170101165771484,
87
- "learning_rate": 7.633435582822086e-06,
88
- "loss": 4.9414,
89
  "step": 5000
90
  },
91
  {
92
  "epoch": 1.6845329249617151,
93
- "grad_norm": 2.908296823501587,
94
- "learning_rate": 7.495398773006135e-06,
95
- "loss": 4.863,
96
  "step": 5500
97
  },
98
  {
99
  "epoch": 1.8376722817764164,
100
- "grad_norm": 3.4065804481506348,
101
- "learning_rate": 7.3573619631901846e-06,
102
- "loss": 4.779,
103
  "step": 6000
104
  },
105
  {
106
  "epoch": 1.9908116385911179,
107
- "grad_norm": 2.6088645458221436,
108
- "learning_rate": 7.219325153374233e-06,
109
- "loss": 4.7011,
110
  "step": 6500
111
  },
112
  {
113
  "epoch": 2.0,
114
- "eval_accuracy": 0.31753758669710935,
115
- "eval_loss": 4.691849231719971,
116
- "eval_model_preparation_time": 0.0035,
117
- "eval_runtime": 713.5751,
118
- "eval_samples_per_second": 97.592,
119
- "eval_steps_per_second": 12.199,
120
  "step": 6530
121
  },
122
  {
123
  "epoch": 2.1439509954058193,
124
- "grad_norm": 2.4476678371429443,
125
- "learning_rate": 7.081288343558283e-06,
126
- "loss": 4.5978,
127
  "step": 7000
128
  },
129
  {
130
  "epoch": 2.2970903522205206,
131
- "grad_norm": 3.1015167236328125,
132
- "learning_rate": 6.9432515337423315e-06,
133
- "loss": 4.53,
134
  "step": 7500
135
  },
136
  {
137
  "epoch": 2.450229709035222,
138
- "grad_norm": 3.4957149028778076,
139
- "learning_rate": 6.80521472392638e-06,
140
- "loss": 4.437,
141
  "step": 8000
142
  },
143
  {
144
  "epoch": 2.6033690658499236,
145
- "grad_norm": 3.372995138168335,
146
- "learning_rate": 6.66717791411043e-06,
147
- "loss": 4.3886,
148
  "step": 8500
149
  },
150
  {
151
  "epoch": 2.756508422664625,
152
- "grad_norm": 2.965057849884033,
153
- "learning_rate": 6.529141104294479e-06,
154
- "loss": 4.3013,
155
  "step": 9000
156
  },
157
  {
158
  "epoch": 2.909647779479326,
159
- "grad_norm": 3.4717111587524414,
160
- "learning_rate": 6.391104294478528e-06,
161
- "loss": 4.2275,
162
  "step": 9500
163
  },
164
  {
165
  "epoch": 3.0,
166
- "eval_accuracy": 0.3973635462887175,
167
- "eval_loss": 4.212672710418701,
168
- "eval_model_preparation_time": 0.0035,
169
- "eval_runtime": 702.2225,
170
- "eval_samples_per_second": 99.169,
171
- "eval_steps_per_second": 12.396,
172
  "step": 9795
173
  },
174
  {
175
  "epoch": 3.0627871362940278,
176
- "grad_norm": 3.6480553150177,
177
- "learning_rate": 6.253067484662577e-06,
178
- "loss": 4.1497,
179
  "step": 10000
180
  },
181
  {
182
  "epoch": 3.215926493108729,
183
- "grad_norm": 2.944525718688965,
184
- "learning_rate": 6.1150306748466255e-06,
185
- "loss": 4.0626,
186
  "step": 10500
187
  },
188
  {
189
  "epoch": 3.3690658499234303,
190
- "grad_norm": 3.6789698600769043,
191
- "learning_rate": 5.976993865030675e-06,
192
- "loss": 3.9861,
193
  "step": 11000
194
  },
195
  {
196
  "epoch": 3.522205206738132,
197
- "grad_norm": 3.7942678928375244,
198
- "learning_rate": 5.838957055214725e-06,
199
- "loss": 3.9302,
200
  "step": 11500
201
  },
202
  {
203
  "epoch": 3.6753445635528332,
204
- "grad_norm": 2.889557123184204,
205
- "learning_rate": 5.700920245398773e-06,
206
- "loss": 3.8667,
207
  "step": 12000
208
  },
209
  {
210
  "epoch": 3.8284839203675345,
211
- "grad_norm": 4.875265121459961,
212
- "learning_rate": 5.562883435582822e-06,
213
- "loss": 3.7844,
214
  "step": 12500
215
  },
216
  {
217
  "epoch": 3.9816232771822357,
218
- "grad_norm": 4.279688835144043,
219
- "learning_rate": 5.424846625766871e-06,
220
- "loss": 3.7406,
221
  "step": 13000
222
  },
223
  {
224
  "epoch": 4.0,
225
- "eval_accuracy": 0.4593833914904005,
226
- "eval_loss": 3.774059772491455,
227
- "eval_model_preparation_time": 0.0035,
228
- "eval_runtime": 705.2879,
229
- "eval_samples_per_second": 98.738,
230
- "eval_steps_per_second": 12.342,
231
  "step": 13060
232
  },
233
  {
234
  "epoch": 4.134762633996937,
235
- "grad_norm": 4.041619777679443,
236
- "learning_rate": 5.286809815950921e-06,
237
- "loss": 3.6478,
238
  "step": 13500
239
  },
240
  {
241
  "epoch": 4.287901990811639,
242
- "grad_norm": 5.805275917053223,
243
- "learning_rate": 5.14877300613497e-06,
244
- "loss": 3.58,
245
  "step": 14000
246
  },
247
  {
248
  "epoch": 4.44104134762634,
249
- "grad_norm": 3.922706365585327,
250
- "learning_rate": 5.010736196319019e-06,
251
- "loss": 3.543,
252
  "step": 14500
253
  },
254
  {
255
  "epoch": 4.594180704441041,
256
- "grad_norm": 3.550119638442993,
257
- "learning_rate": 4.872699386503067e-06,
258
- "loss": 3.4574,
259
  "step": 15000
260
  },
261
  {
262
  "epoch": 4.747320061255743,
263
- "grad_norm": 5.960729122161865,
264
- "learning_rate": 4.734662576687116e-06,
265
- "loss": 3.4116,
266
  "step": 15500
267
  },
268
  {
269
  "epoch": 4.900459418070444,
270
- "grad_norm": 4.563659191131592,
271
- "learning_rate": 4.5966257668711664e-06,
272
- "loss": 3.3544,
273
  "step": 16000
274
  },
275
  {
276
  "epoch": 5.0,
277
- "eval_accuracy": 0.5196800643317682,
278
- "eval_loss": 3.398108959197998,
279
- "eval_model_preparation_time": 0.0035,
280
- "eval_runtime": 700.4362,
281
- "eval_samples_per_second": 99.422,
282
- "eval_steps_per_second": 12.428,
283
  "step": 16325
284
  },
285
  {
286
  "epoch": 5.053598774885145,
287
- "grad_norm": 5.071287631988525,
288
- "learning_rate": 4.458588957055215e-06,
289
- "loss": 3.2835,
290
  "step": 16500
291
  },
292
  {
293
  "epoch": 5.206738131699847,
294
- "grad_norm": 5.6899003982543945,
295
- "learning_rate": 4.320552147239264e-06,
296
- "loss": 3.2203,
297
  "step": 17000
298
  },
299
  {
300
  "epoch": 5.359877488514548,
301
- "grad_norm": 5.455111980438232,
302
- "learning_rate": 4.1825153374233126e-06,
303
- "loss": 3.1822,
304
  "step": 17500
305
  },
306
  {
307
  "epoch": 5.51301684532925,
308
- "grad_norm": 7.294569969177246,
309
- "learning_rate": 4.044478527607362e-06,
310
- "loss": 3.1201,
311
  "step": 18000
312
  },
313
  {
314
  "epoch": 5.666156202143951,
315
- "grad_norm": 4.797060012817383,
316
- "learning_rate": 3.906441717791411e-06,
317
- "loss": 3.0764,
318
  "step": 18500
319
  },
320
  {
321
  "epoch": 5.819295558958652,
322
- "grad_norm": 9.175103187561035,
323
- "learning_rate": 3.7684049079754604e-06,
324
- "loss": 3.0317,
325
  "step": 19000
326
  },
327
  {
328
  "epoch": 5.972434915773354,
329
- "grad_norm": 5.574986934661865,
330
- "learning_rate": 3.630368098159509e-06,
331
- "loss": 3.0015,
332
  "step": 19500
333
  },
334
  {
335
  "epoch": 6.0,
336
- "eval_accuracy": 0.5613951952210686,
337
- "eval_loss": 3.0899131298065186,
338
- "eval_model_preparation_time": 0.0035,
339
- "eval_runtime": 705.6276,
340
- "eval_samples_per_second": 98.691,
341
- "eval_steps_per_second": 12.337,
342
  "step": 19590
343
  },
344
  {
345
  "epoch": 6.1255742725880555,
346
- "grad_norm": 6.0056352615356445,
347
- "learning_rate": 3.4923312883435583e-06,
348
- "loss": 2.921,
349
  "step": 20000
350
  },
351
  {
352
  "epoch": 6.278713629402756,
353
- "grad_norm": 12.546520233154297,
354
- "learning_rate": 3.3542944785276074e-06,
355
- "loss": 2.8971,
356
  "step": 20500
357
  },
358
  {
359
  "epoch": 6.431852986217458,
360
- "grad_norm": 4.947099208831787,
361
- "learning_rate": 3.2162576687116565e-06,
362
- "loss": 2.8553,
363
  "step": 21000
364
  },
365
  {
366
  "epoch": 6.584992343032159,
367
- "grad_norm": 8.054845809936523,
368
- "learning_rate": 3.0782208588957057e-06,
369
- "loss": 2.8151,
370
  "step": 21500
371
  },
372
  {
373
  "epoch": 6.738131699846861,
374
- "grad_norm": 6.752479076385498,
375
- "learning_rate": 2.940184049079755e-06,
376
- "loss": 2.7747,
377
  "step": 22000
378
  },
379
  {
380
  "epoch": 6.891271056661562,
381
- "grad_norm": 6.941985607147217,
382
- "learning_rate": 2.8021472392638035e-06,
383
- "loss": 2.7401,
384
  "step": 22500
385
  },
386
  {
387
  "epoch": 7.0,
388
- "eval_accuracy": 0.5933313229655797,
389
- "eval_loss": 2.853682279586792,
390
- "eval_model_preparation_time": 0.0035,
391
- "eval_runtime": 697.9759,
392
- "eval_samples_per_second": 99.773,
393
- "eval_steps_per_second": 12.472,
394
  "step": 22855
395
  },
396
  {
397
  "epoch": 7.044410413476263,
398
- "grad_norm": 4.710619926452637,
399
- "learning_rate": 2.664110429447853e-06,
400
- "loss": 2.7003,
401
  "step": 23000
402
  },
403
  {
404
  "epoch": 7.197549770290965,
405
- "grad_norm": 5.312741756439209,
406
- "learning_rate": 2.5260736196319018e-06,
407
- "loss": 2.6404,
408
  "step": 23500
409
  },
410
  {
411
  "epoch": 7.3506891271056665,
412
- "grad_norm": 6.6971845626831055,
413
- "learning_rate": 2.3880368098159513e-06,
414
- "loss": 2.6377,
415
  "step": 24000
416
  },
417
  {
418
  "epoch": 7.503828483920367,
419
- "grad_norm": 6.931722164154053,
420
- "learning_rate": 2.25e-06,
421
- "loss": 2.5981,
422
  "step": 24500
423
  },
424
  {
425
  "epoch": 7.656967840735069,
426
- "grad_norm": 7.1908040046691895,
427
- "learning_rate": 2.111963190184049e-06,
428
- "loss": 2.5702,
429
  "step": 25000
430
  },
431
  {
432
  "epoch": 7.810107197549771,
433
- "grad_norm": 5.63743257522583,
434
- "learning_rate": 1.9739263803680983e-06,
435
- "loss": 2.5676,
436
  "step": 25500
437
  },
438
  {
439
  "epoch": 7.9632465543644715,
440
- "grad_norm": 6.103708267211914,
441
- "learning_rate": 1.8358895705521473e-06,
442
- "loss": 2.5566,
443
  "step": 26000
444
  },
445
  {
446
  "epoch": 8.0,
447
- "eval_accuracy": 0.6183460417294907,
448
- "eval_loss": 2.6820664405822754,
449
- "eval_model_preparation_time": 0.0035,
450
- "eval_runtime": 747.6734,
451
- "eval_samples_per_second": 93.141,
452
- "eval_steps_per_second": 11.643,
453
  "step": 26120
454
  },
455
  {
456
  "epoch": 8.116385911179172,
457
- "grad_norm": 7.664525508880615,
458
- "learning_rate": 1.6978527607361964e-06,
459
- "loss": 2.4933,
460
  "step": 26500
461
  },
462
  {
463
  "epoch": 8.269525267993874,
464
- "grad_norm": 8.328133583068848,
465
- "learning_rate": 1.5598159509202455e-06,
466
- "loss": 2.4762,
467
  "step": 27000
468
  },
469
  {
470
  "epoch": 8.422664624808576,
471
- "grad_norm": 8.003011703491211,
472
- "learning_rate": 1.4217791411042944e-06,
473
- "loss": 2.4585,
474
  "step": 27500
475
  },
476
  {
477
  "epoch": 8.575803981623277,
478
- "grad_norm": 6.738711833953857,
479
- "learning_rate": 1.2837423312883436e-06,
480
- "loss": 2.4542,
481
  "step": 28000
482
  },
483
  {
484
  "epoch": 8.728943338437979,
485
- "grad_norm": 6.110630512237549,
486
- "learning_rate": 1.1457055214723925e-06,
487
- "loss": 2.4222,
488
  "step": 28500
489
  },
490
  {
491
  "epoch": 8.88208269525268,
492
- "grad_norm": 5.14086389541626,
493
- "learning_rate": 1.0076687116564419e-06,
494
- "loss": 2.4216,
495
  "step": 29000
496
  },
497
  {
498
  "epoch": 9.0,
499
- "eval_accuracy": 0.6327058113987851,
500
- "eval_loss": 2.581773042678833,
501
- "eval_model_preparation_time": 0.0035,
502
- "eval_runtime": 754.0093,
503
- "eval_samples_per_second": 92.358,
504
- "eval_steps_per_second": 11.545,
505
  "step": 29385
506
  },
507
  {
508
  "epoch": 9.03522205206738,
509
- "grad_norm": 7.1302971839904785,
510
- "learning_rate": 8.696319018404908e-07,
511
- "loss": 2.4115,
512
  "step": 29500
513
  },
514
  {
515
  "epoch": 9.188361408882082,
516
- "grad_norm": 5.568242073059082,
517
- "learning_rate": 7.315950920245399e-07,
518
- "loss": 2.3616,
519
  "step": 30000
520
  },
521
  {
522
  "epoch": 9.341500765696784,
523
- "grad_norm": 4.705562591552734,
524
- "learning_rate": 5.93558282208589e-07,
525
- "loss": 2.3858,
526
  "step": 30500
527
  },
528
  {
529
  "epoch": 9.494640122511486,
530
- "grad_norm": 6.591212749481201,
531
- "learning_rate": 4.5552147239263803e-07,
532
- "loss": 2.3935,
533
  "step": 31000
534
  },
535
  {
536
  "epoch": 9.647779479326188,
537
- "grad_norm": 7.563518524169922,
538
- "learning_rate": 3.174846625766871e-07,
539
- "loss": 2.3611,
540
  "step": 31500
541
  },
542
  {
543
  "epoch": 9.800918836140887,
544
- "grad_norm": 7.720861434936523,
545
- "learning_rate": 1.794478527607362e-07,
546
- "loss": 2.3667,
547
  "step": 32000
548
  },
549
  {
550
  "epoch": 9.95405819295559,
551
- "grad_norm": 5.801350116729736,
552
- "learning_rate": 4.141104294478528e-08,
553
- "loss": 2.3529,
554
  "step": 32500
555
  },
556
  {
557
  "epoch": 10.0,
558
- "eval_accuracy": 0.6372434986142822,
559
- "eval_loss": 2.5461041927337646,
560
- "eval_model_preparation_time": 0.0035,
561
- "eval_runtime": 700.2631,
562
- "eval_samples_per_second": 99.447,
563
- "eval_steps_per_second": 12.431,
564
  "step": 32650
565
  }
566
  ],
 
1
  {
2
+ "best_metric": 1.0125610828399658,
3
  "best_model_checkpoint": "car_models_image_detection/checkpoint-32650",
4
  "epoch": 10.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.15313935681470137,
13
+ "grad_norm": 1.452757477760315,
14
+ "learning_rate": 1.4792944785276074e-05,
15
+ "loss": 5.7276,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.30627871362940273,
20
+ "grad_norm": 1.6236835718154907,
21
+ "learning_rate": 1.4562883435582821e-05,
22
+ "loss": 5.577,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.45941807044410415,
27
+ "grad_norm": 1.6465808153152466,
28
+ "learning_rate": 1.433282208588957e-05,
29
+ "loss": 5.415,
30
  "step": 1500
31
  },
32
  {
33
  "epoch": 0.6125574272588055,
34
+ "grad_norm": 1.6408442258834839,
35
+ "learning_rate": 1.410276073619632e-05,
36
+ "loss": 5.2564,
37
  "step": 2000
38
  },
39
  {
40
  "epoch": 0.7656967840735069,
41
+ "grad_norm": 2.1702845096588135,
42
+ "learning_rate": 1.3872699386503068e-05,
43
+ "loss": 5.097,
44
  "step": 2500
45
  },
46
  {
47
  "epoch": 0.9188361408882083,
48
+ "grad_norm": 2.1283822059631348,
49
+ "learning_rate": 1.3642638036809815e-05,
50
+ "loss": 4.9628,
51
  "step": 3000
52
  },
53
  {
54
  "epoch": 1.0,
55
+ "eval_accuracy": 0.27259150763221757,
56
+ "eval_loss": 4.833654403686523,
57
+ "eval_model_preparation_time": 0.0031,
58
+ "eval_runtime": 685.3438,
59
+ "eval_samples_per_second": 101.612,
60
+ "eval_steps_per_second": 12.702,
61
  "step": 3265
62
  },
63
  {
64
  "epoch": 1.0719754977029097,
65
+ "grad_norm": 2.1697254180908203,
66
+ "learning_rate": 1.3412576687116564e-05,
67
+ "loss": 4.8042,
68
  "step": 3500
69
  },
70
  {
71
  "epoch": 1.225114854517611,
72
+ "grad_norm": 2.616407632827759,
73
+ "learning_rate": 1.3182515337423314e-05,
74
+ "loss": 4.6501,
75
  "step": 4000
76
  },
77
  {
78
  "epoch": 1.3782542113323124,
79
+ "grad_norm": 2.611531972885132,
80
+ "learning_rate": 1.2952453987730061e-05,
81
+ "loss": 4.4964,
82
  "step": 4500
83
  },
84
  {
85
  "epoch": 1.5313935681470139,
86
+ "grad_norm": 3.050095558166504,
87
+ "learning_rate": 1.272239263803681e-05,
88
+ "loss": 4.3653,
89
  "step": 5000
90
  },
91
  {
92
  "epoch": 1.6845329249617151,
93
+ "grad_norm": 3.209902048110962,
94
+ "learning_rate": 1.2492331288343558e-05,
95
+ "loss": 4.2269,
96
  "step": 5500
97
  },
98
  {
99
  "epoch": 1.8376722817764164,
100
+ "grad_norm": 3.8992629051208496,
101
+ "learning_rate": 1.2262269938650307e-05,
102
+ "loss": 4.0844,
103
  "step": 6000
104
  },
105
  {
106
  "epoch": 1.9908116385911179,
107
+ "grad_norm": 3.587782382965088,
108
+ "learning_rate": 1.2032208588957057e-05,
109
+ "loss": 3.9577,
110
  "step": 6500
111
  },
112
  {
113
  "epoch": 2.0,
114
+ "eval_accuracy": 0.432200347506426,
115
+ "eval_loss": 3.934602975845337,
116
+ "eval_model_preparation_time": 0.0031,
117
+ "eval_runtime": 682.9317,
118
+ "eval_samples_per_second": 101.971,
119
+ "eval_steps_per_second": 12.747,
120
  "step": 6530
121
  },
122
  {
123
  "epoch": 2.1439509954058193,
124
+ "grad_norm": 4.234837532043457,
125
+ "learning_rate": 1.1802147239263804e-05,
126
+ "loss": 3.7778,
127
  "step": 7000
128
  },
129
  {
130
  "epoch": 2.2970903522205206,
131
+ "grad_norm": 3.8425912857055664,
132
+ "learning_rate": 1.1572085889570552e-05,
133
+ "loss": 3.6515,
134
  "step": 7500
135
  },
136
  {
137
  "epoch": 2.450229709035222,
138
+ "grad_norm": 3.8768410682678223,
139
+ "learning_rate": 1.1342024539877301e-05,
140
+ "loss": 3.5171,
141
  "step": 8000
142
  },
143
  {
144
  "epoch": 2.6033690658499236,
145
+ "grad_norm": 4.20833683013916,
146
+ "learning_rate": 1.111196319018405e-05,
147
+ "loss": 3.3911,
148
  "step": 8500
149
  },
150
  {
151
  "epoch": 2.756508422664625,
152
+ "grad_norm": 4.394293785095215,
153
+ "learning_rate": 1.0881901840490798e-05,
154
+ "loss": 3.2882,
155
  "step": 9000
156
  },
157
  {
158
  "epoch": 2.909647779479326,
159
+ "grad_norm": 4.391124725341797,
160
+ "learning_rate": 1.0651840490797547e-05,
161
+ "loss": 3.1777,
162
  "step": 9500
163
  },
164
  {
165
  "epoch": 3.0,
166
+ "eval_accuracy": 0.5524059794080903,
167
+ "eval_loss": 3.125910520553589,
168
+ "eval_model_preparation_time": 0.0031,
169
+ "eval_runtime": 696.3871,
170
+ "eval_samples_per_second": 100.0,
171
+ "eval_steps_per_second": 12.5,
172
  "step": 9795
173
  },
174
  {
175
  "epoch": 3.0627871362940278,
176
+ "grad_norm": 4.910506248474121,
177
+ "learning_rate": 1.0421779141104295e-05,
178
+ "loss": 3.0296,
179
  "step": 10000
180
  },
181
  {
182
  "epoch": 3.215926493108729,
183
+ "grad_norm": 4.594521999359131,
184
+ "learning_rate": 1.0191717791411042e-05,
185
+ "loss": 2.8819,
186
  "step": 10500
187
  },
188
  {
189
  "epoch": 3.3690658499234303,
190
+ "grad_norm": 4.765926361083984,
191
+ "learning_rate": 9.961656441717793e-06,
192
+ "loss": 2.7684,
193
  "step": 11000
194
  },
195
  {
196
  "epoch": 3.522205206738132,
197
+ "grad_norm": 5.226629734039307,
198
+ "learning_rate": 9.73159509202454e-06,
199
+ "loss": 2.6588,
200
  "step": 11500
201
  },
202
  {
203
  "epoch": 3.6753445635528332,
204
+ "grad_norm": 5.948498725891113,
205
+ "learning_rate": 9.501533742331288e-06,
206
+ "loss": 2.5607,
207
  "step": 12000
208
  },
209
  {
210
  "epoch": 3.8284839203675345,
211
+ "grad_norm": 5.840036392211914,
212
+ "learning_rate": 9.271472392638038e-06,
213
+ "loss": 2.4664,
214
  "step": 12500
215
  },
216
  {
217
  "epoch": 3.9816232771822357,
218
+ "grad_norm": 6.542360782623291,
219
+ "learning_rate": 9.041411042944785e-06,
220
+ "loss": 2.3674,
221
  "step": 13000
222
  },
223
  {
224
  "epoch": 4.0,
225
+ "eval_accuracy": 0.6481712833326153,
226
+ "eval_loss": 2.4429006576538086,
227
+ "eval_model_preparation_time": 0.0031,
228
+ "eval_runtime": 700.3344,
229
+ "eval_samples_per_second": 99.437,
230
+ "eval_steps_per_second": 12.43,
231
  "step": 13060
232
  },
233
  {
234
  "epoch": 4.134762633996937,
235
+ "grad_norm": 5.998870372772217,
236
+ "learning_rate": 8.811349693251534e-06,
237
+ "loss": 2.2206,
238
  "step": 13500
239
  },
240
  {
241
  "epoch": 4.287901990811639,
242
+ "grad_norm": 5.736461639404297,
243
+ "learning_rate": 8.581288343558282e-06,
244
+ "loss": 2.1328,
245
  "step": 14000
246
  },
247
  {
248
  "epoch": 4.44104134762634,
249
+ "grad_norm": 8.815576553344727,
250
+ "learning_rate": 8.351226993865031e-06,
251
+ "loss": 2.0378,
252
  "step": 14500
253
  },
254
  {
255
  "epoch": 4.594180704441041,
256
+ "grad_norm": 6.433719635009766,
257
+ "learning_rate": 8.121165644171779e-06,
258
+ "loss": 2.0028,
259
  "step": 15000
260
  },
261
  {
262
  "epoch": 4.747320061255743,
263
+ "grad_norm": 6.188543796539307,
264
+ "learning_rate": 7.891104294478526e-06,
265
+ "loss": 1.8992,
266
  "step": 15500
267
  },
268
  {
269
  "epoch": 4.900459418070444,
270
+ "grad_norm": 4.191030979156494,
271
+ "learning_rate": 7.661042944785277e-06,
272
+ "loss": 1.8401,
273
  "step": 16000
274
  },
275
  {
276
  "epoch": 5.0,
277
+ "eval_accuracy": 0.7155042433119373,
278
+ "eval_loss": 1.9256677627563477,
279
+ "eval_model_preparation_time": 0.0031,
280
+ "eval_runtime": 697.1043,
281
+ "eval_samples_per_second": 99.898,
282
+ "eval_steps_per_second": 12.487,
283
  "step": 16325
284
  },
285
  {
286
  "epoch": 5.053598774885145,
287
+ "grad_norm": 6.046599388122559,
288
+ "learning_rate": 7.430981595092025e-06,
289
+ "loss": 1.749,
290
  "step": 16500
291
  },
292
  {
293
  "epoch": 5.206738131699847,
294
+ "grad_norm": 7.202089309692383,
295
+ "learning_rate": 7.200920245398773e-06,
296
+ "loss": 1.6562,
297
  "step": 17000
298
  },
299
  {
300
  "epoch": 5.359877488514548,
301
+ "grad_norm": 8.078461647033691,
302
+ "learning_rate": 6.970858895705521e-06,
303
+ "loss": 1.5944,
304
  "step": 17500
305
  },
306
  {
307
  "epoch": 5.51301684532925,
308
+ "grad_norm": 8.001608848571777,
309
+ "learning_rate": 6.74079754601227e-06,
310
+ "loss": 1.517,
311
  "step": 18000
312
  },
313
  {
314
  "epoch": 5.666156202143951,
315
+ "grad_norm": 7.847782135009766,
316
+ "learning_rate": 6.510736196319019e-06,
317
+ "loss": 1.4779,
318
  "step": 18500
319
  },
320
  {
321
  "epoch": 5.819295558958652,
322
+ "grad_norm": 8.12964153289795,
323
+ "learning_rate": 6.280674846625767e-06,
324
+ "loss": 1.4133,
325
  "step": 19000
326
  },
327
  {
328
  "epoch": 5.972434915773354,
329
+ "grad_norm": 5.552104473114014,
330
+ "learning_rate": 6.0506134969325155e-06,
331
+ "loss": 1.375,
332
  "step": 19500
333
  },
334
  {
335
  "epoch": 6.0,
336
+ "eval_accuracy": 0.7652464854463734,
337
+ "eval_loss": 1.5537763833999634,
338
+ "eval_model_preparation_time": 0.0031,
339
+ "eval_runtime": 693.8343,
340
+ "eval_samples_per_second": 100.368,
341
+ "eval_steps_per_second": 12.546,
342
  "step": 19590
343
  },
344
  {
345
  "epoch": 6.1255742725880555,
346
+ "grad_norm": 7.002144813537598,
347
+ "learning_rate": 5.820552147239264e-06,
348
+ "loss": 1.292,
349
  "step": 20000
350
  },
351
  {
352
  "epoch": 6.278713629402756,
353
+ "grad_norm": 8.652531623840332,
354
+ "learning_rate": 5.590490797546012e-06,
355
+ "loss": 1.2451,
356
  "step": 20500
357
  },
358
  {
359
  "epoch": 6.431852986217458,
360
+ "grad_norm": 6.782195568084717,
361
+ "learning_rate": 5.360429447852761e-06,
362
+ "loss": 1.199,
363
  "step": 21000
364
  },
365
  {
366
  "epoch": 6.584992343032159,
367
+ "grad_norm": 7.46057653427124,
368
+ "learning_rate": 5.13036809815951e-06,
369
+ "loss": 1.1784,
370
  "step": 21500
371
  },
372
  {
373
  "epoch": 6.738131699846861,
374
+ "grad_norm": 9.419745445251465,
375
+ "learning_rate": 4.900306748466258e-06,
376
+ "loss": 1.1378,
377
  "step": 22000
378
  },
379
  {
380
  "epoch": 6.891271056661562,
381
+ "grad_norm": 10.585297584533691,
382
+ "learning_rate": 4.670245398773006e-06,
383
+ "loss": 1.1101,
384
  "step": 22500
385
  },
386
  {
387
  "epoch": 7.0,
388
+ "eval_accuracy": 0.8005858786025072,
389
+ "eval_loss": 1.3030942678451538,
390
+ "eval_model_preparation_time": 0.0031,
391
+ "eval_runtime": 695.6789,
392
+ "eval_samples_per_second": 100.102,
393
+ "eval_steps_per_second": 12.513,
394
  "step": 22855
395
  },
396
  {
397
  "epoch": 7.044410413476263,
398
+ "grad_norm": 6.227669715881348,
399
+ "learning_rate": 4.440184049079755e-06,
400
+ "loss": 1.0649,
401
  "step": 23000
402
  },
403
  {
404
  "epoch": 7.197549770290965,
405
+ "grad_norm": 6.01344108581543,
406
+ "learning_rate": 4.210122699386503e-06,
407
+ "loss": 0.9895,
408
  "step": 23500
409
  },
410
  {
411
  "epoch": 7.3506891271056665,
412
+ "grad_norm": 7.48536491394043,
413
+ "learning_rate": 3.980061349693252e-06,
414
+ "loss": 0.9931,
415
  "step": 24000
416
  },
417
  {
418
  "epoch": 7.503828483920367,
419
+ "grad_norm": 4.995929718017578,
420
+ "learning_rate": 3.75e-06,
421
+ "loss": 0.9504,
422
  "step": 24500
423
  },
424
  {
425
  "epoch": 7.656967840735069,
426
+ "grad_norm": 7.199100017547607,
427
+ "learning_rate": 3.5199386503067485e-06,
428
+ "loss": 0.9414,
429
  "step": 25000
430
  },
431
  {
432
  "epoch": 7.810107197549771,
433
+ "grad_norm": 10.500569343566895,
434
+ "learning_rate": 3.2898773006134974e-06,
435
+ "loss": 0.9215,
436
  "step": 25500
437
  },
438
  {
439
  "epoch": 7.9632465543644715,
440
+ "grad_norm": 12.734477043151855,
441
+ "learning_rate": 3.0598159509202454e-06,
442
+ "loss": 0.9171,
443
  "step": 26000
444
  },
445
  {
446
  "epoch": 8.0,
447
+ "eval_accuracy": 0.8239205043151108,
448
+ "eval_loss": 1.1368967294692993,
449
+ "eval_model_preparation_time": 0.0031,
450
+ "eval_runtime": 700.1379,
451
+ "eval_samples_per_second": 99.465,
452
+ "eval_steps_per_second": 12.433,
453
  "step": 26120
454
  },
455
  {
456
  "epoch": 8.116385911179172,
457
+ "grad_norm": 5.377818584442139,
458
+ "learning_rate": 2.8297546012269938e-06,
459
+ "loss": 0.877,
460
  "step": 26500
461
  },
462
  {
463
  "epoch": 8.269525267993874,
464
+ "grad_norm": 8.250605583190918,
465
+ "learning_rate": 2.599693251533742e-06,
466
+ "loss": 0.8307,
467
  "step": 27000
468
  },
469
  {
470
  "epoch": 8.422664624808576,
471
+ "grad_norm": 6.766539096832275,
472
+ "learning_rate": 2.369631901840491e-06,
473
+ "loss": 0.8152,
474
  "step": 27500
475
  },
476
  {
477
  "epoch": 8.575803981623277,
478
+ "grad_norm": 8.64637565612793,
479
+ "learning_rate": 2.1395705521472395e-06,
480
+ "loss": 0.8146,
481
  "step": 28000
482
  },
483
  {
484
  "epoch": 8.728943338437979,
485
+ "grad_norm": 7.545835494995117,
486
+ "learning_rate": 1.9095092024539874e-06,
487
+ "loss": 0.8026,
488
  "step": 28500
489
  },
490
  {
491
  "epoch": 8.88208269525268,
492
+ "grad_norm": 7.024548053741455,
493
+ "learning_rate": 1.6794478527607363e-06,
494
+ "loss": 0.7875,
495
  "step": 29000
496
  },
497
  {
498
  "epoch": 9.0,
499
+ "eval_accuracy": 0.8368155774781373,
500
+ "eval_loss": 1.044851541519165,
501
+ "eval_model_preparation_time": 0.0031,
502
+ "eval_runtime": 688.1062,
503
+ "eval_samples_per_second": 101.204,
504
+ "eval_steps_per_second": 12.651,
505
  "step": 29385
506
  },
507
  {
508
  "epoch": 9.03522205206738,
509
+ "grad_norm": 6.825665473937988,
510
+ "learning_rate": 1.4493865030674847e-06,
511
+ "loss": 0.7609,
512
  "step": 29500
513
  },
514
  {
515
  "epoch": 9.188361408882082,
516
+ "grad_norm": 8.946706771850586,
517
+ "learning_rate": 1.2193251533742331e-06,
518
+ "loss": 0.7448,
519
  "step": 30000
520
  },
521
  {
522
  "epoch": 9.341500765696784,
523
+ "grad_norm": 5.960868835449219,
524
+ "learning_rate": 9.892638036809815e-07,
525
+ "loss": 0.7412,
526
  "step": 30500
527
  },
528
  {
529
  "epoch": 9.494640122511486,
530
+ "grad_norm": 8.354199409484863,
531
+ "learning_rate": 7.592024539877301e-07,
532
+ "loss": 0.7391,
533
  "step": 31000
534
  },
535
  {
536
  "epoch": 9.647779479326188,
537
+ "grad_norm": 7.826401233673096,
538
+ "learning_rate": 5.291411042944785e-07,
539
+ "loss": 0.7451,
540
  "step": 31500
541
  },
542
  {
543
  "epoch": 9.800918836140887,
544
+ "grad_norm": 6.866321563720703,
545
+ "learning_rate": 2.99079754601227e-07,
546
+ "loss": 0.7328,
547
  "step": 32000
548
  },
549
  {
550
  "epoch": 9.95405819295559,
551
+ "grad_norm": 8.034021377563477,
552
+ "learning_rate": 6.901840490797547e-08,
553
+ "loss": 0.7235,
554
  "step": 32500
555
  },
556
  {
557
  "epoch": 10.0,
558
+ "eval_accuracy": 0.8410229899912406,
559
+ "eval_loss": 1.0125610828399658,
560
+ "eval_model_preparation_time": 0.0031,
561
+ "eval_runtime": 702.1203,
562
+ "eval_samples_per_second": 99.184,
563
+ "eval_steps_per_second": 12.398,
564
  "step": 32650
565
  }
566
  ],
checkpoint-32650/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e6d4fb80b458c99c9714ea41fb1e577e8ccfede2bf1093e2526b5a39cd83447
3
  size 4795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be005c2ee4135e2fbecdbef74a8ceaff175c6010baa4c959e76dc86ecc700a3
3
  size 4795
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af479e8111ef4375572925dd0fe92ff9a598e337c328f0fde4bf3c4ab72659e5
3
  size 344211388
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b5f4edd84d0939be14fc0d0c3144ca3b1b8e10556b1b5ec4bb8e6ccf46c541
3
  size 344211388
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e6d4fb80b458c99c9714ea41fb1e577e8ccfede2bf1093e2526b5a39cd83447
3
  size 4795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be005c2ee4135e2fbecdbef74a8ceaff175c6010baa4c959e76dc86ecc700a3
3
  size 4795