anoaky commited on
Commit
3bdb2c7
·
verified ·
1 Parent(s): 24952fb

Training in progress, epoch 2, checkpoint

Browse files
checkpoint-1110/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c6f4bf1aa355bc7e15ea5973efde11a86297856278437b219a2af0ccb2071ef
3
  size 1629432864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247a16d9214bc118326832045811cbf5330f1d891eb8102b813f14ad0df25397
3
  size 1629432864
checkpoint-1110/trainer_state.json CHANGED
@@ -10,801 +10,809 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.018026137899954935,
13
- "grad_norm": 21.723926544189453,
14
  "learning_rate": 4.981949458483755e-05,
15
- "loss": 0.8104,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.03605227579990987,
20
- "grad_norm": 55.4254035949707,
21
  "learning_rate": 4.963898916967509e-05,
22
- "loss": 0.5178,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.054078413699864804,
27
- "grad_norm": 41.4888801574707,
28
  "learning_rate": 4.945848375451264e-05,
29
- "loss": 0.5092,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.07210455159981974,
34
- "grad_norm": 25.77608299255371,
35
  "learning_rate": 4.927797833935018e-05,
36
- "loss": 0.5581,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.09013068949977468,
41
- "grad_norm": 61.5268669128418,
42
  "learning_rate": 4.909747292418773e-05,
43
- "loss": 0.5132,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.10815682739972961,
48
- "grad_norm": 18.218244552612305,
49
  "learning_rate": 4.891696750902527e-05,
50
- "loss": 0.4614,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.12618296529968454,
55
- "grad_norm": 30.51982879638672,
56
  "learning_rate": 4.873646209386282e-05,
57
- "loss": 0.4648,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.14420910319963948,
62
- "grad_norm": 20.707260131835938,
63
  "learning_rate": 4.855595667870036e-05,
64
- "loss": 0.4317,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.16223524109959442,
69
- "grad_norm": 21.699357986450195,
70
  "learning_rate": 4.837545126353791e-05,
71
- "loss": 0.4226,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.18026137899954936,
76
- "grad_norm": 51.80731201171875,
77
  "learning_rate": 4.819494584837546e-05,
78
- "loss": 0.5002,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.19828751689950427,
83
- "grad_norm": 44.694942474365234,
84
  "learning_rate": 4.8014440433213e-05,
85
- "loss": 0.4933,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.21631365479945922,
90
- "grad_norm": 22.704530715942383,
91
  "learning_rate": 4.783393501805055e-05,
92
- "loss": 0.4399,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.23433979269941416,
97
- "grad_norm": 26.04544448852539,
98
  "learning_rate": 4.765342960288809e-05,
99
- "loss": 0.438,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.25236593059936907,
104
- "grad_norm": 14.642335891723633,
105
  "learning_rate": 4.747292418772563e-05,
106
- "loss": 0.4591,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.270392068499324,
111
- "grad_norm": 16.13884162902832,
112
  "learning_rate": 4.7292418772563177e-05,
113
- "loss": 0.4546,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.28841820639927895,
118
- "grad_norm": 29.33772087097168,
119
  "learning_rate": 4.711191335740072e-05,
120
- "loss": 0.4665,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.3064443442992339,
125
- "grad_norm": 15.900433540344238,
126
  "learning_rate": 4.693140794223827e-05,
127
- "loss": 0.4703,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.32447048219918884,
132
- "grad_norm": 17.246356964111328,
133
  "learning_rate": 4.675090252707581e-05,
134
- "loss": 0.426,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.3424966200991438,
139
- "grad_norm": 25.457477569580078,
140
  "learning_rate": 4.657039711191336e-05,
141
- "loss": 0.4711,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.3605227579990987,
146
- "grad_norm": 25.43963050842285,
147
  "learning_rate": 4.63898916967509e-05,
148
- "loss": 0.4547,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.3785488958990536,
153
- "grad_norm": 32.49702072143555,
154
  "learning_rate": 4.620938628158845e-05,
155
- "loss": 0.4603,
156
  "step": 210
157
  },
158
  {
159
  "epoch": 0.39657503379900855,
160
- "grad_norm": 52.78520965576172,
161
  "learning_rate": 4.602888086642599e-05,
162
- "loss": 0.4471,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.4146011716989635,
167
- "grad_norm": 16.70362663269043,
168
  "learning_rate": 4.584837545126354e-05,
169
- "loss": 0.4393,
170
  "step": 230
171
  },
172
  {
173
  "epoch": 0.43262730959891843,
174
- "grad_norm": 23.55246353149414,
175
  "learning_rate": 4.566787003610109e-05,
176
- "loss": 0.4256,
177
  "step": 240
178
  },
179
  {
180
  "epoch": 0.45065344749887337,
181
- "grad_norm": 18.868804931640625,
182
  "learning_rate": 4.548736462093863e-05,
183
- "loss": 0.4324,
184
  "step": 250
185
  },
186
  {
187
  "epoch": 0.4686795853988283,
188
- "grad_norm": 19.074827194213867,
189
  "learning_rate": 4.530685920577618e-05,
190
- "loss": 0.4419,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.48670572329878325,
195
- "grad_norm": 29.232803344726562,
196
  "learning_rate": 4.5126353790613716e-05,
197
- "loss": 0.4179,
198
  "step": 270
199
  },
200
  {
201
  "epoch": 0.5047318611987381,
202
- "grad_norm": 17.780059814453125,
203
  "learning_rate": 4.494584837545127e-05,
204
- "loss": 0.3917,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.5227579990986931,
209
- "grad_norm": 13.99399471282959,
210
  "learning_rate": 4.4765342960288806e-05,
211
- "loss": 0.4321,
212
  "step": 290
213
  },
214
  {
215
  "epoch": 0.540784136998648,
216
- "grad_norm": 14.700942039489746,
217
  "learning_rate": 4.458483754512636e-05,
218
- "loss": 0.4163,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.558810274898603,
223
- "grad_norm": 17.43704605102539,
224
  "learning_rate": 4.44043321299639e-05,
225
- "loss": 0.4268,
226
  "step": 310
227
  },
228
  {
229
  "epoch": 0.5768364127985579,
230
- "grad_norm": 28.500301361083984,
231
  "learning_rate": 4.422382671480145e-05,
232
- "loss": 0.4229,
233
  "step": 320
234
  },
235
  {
236
  "epoch": 0.5948625506985128,
237
- "grad_norm": 36.14088439941406,
238
  "learning_rate": 4.404332129963899e-05,
239
- "loss": 0.4559,
240
  "step": 330
241
  },
242
  {
243
  "epoch": 0.6128886885984678,
244
- "grad_norm": 15.279230117797852,
245
  "learning_rate": 4.386281588447654e-05,
246
- "loss": 0.4067,
247
  "step": 340
248
  },
249
  {
250
  "epoch": 0.6309148264984227,
251
- "grad_norm": 21.569740295410156,
252
  "learning_rate": 4.368231046931408e-05,
253
- "loss": 0.4298,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 0.6489409643983777,
258
- "grad_norm": 20.630115509033203,
259
  "learning_rate": 4.350180505415163e-05,
260
- "loss": 0.4173,
261
  "step": 360
262
  },
263
  {
264
  "epoch": 0.6669671022983326,
265
- "grad_norm": 15.515020370483398,
266
  "learning_rate": 4.332129963898917e-05,
267
- "loss": 0.4063,
268
  "step": 370
269
  },
270
  {
271
  "epoch": 0.6849932401982876,
272
- "grad_norm": 16.33180046081543,
273
  "learning_rate": 4.314079422382672e-05,
274
- "loss": 0.3651,
275
  "step": 380
276
  },
277
  {
278
  "epoch": 0.7030193780982424,
279
- "grad_norm": 16.77664566040039,
280
  "learning_rate": 4.296028880866426e-05,
281
- "loss": 0.3879,
282
  "step": 390
283
  },
284
  {
285
  "epoch": 0.7210455159981974,
286
- "grad_norm": 11.113824844360352,
287
  "learning_rate": 4.277978339350181e-05,
288
- "loss": 0.4205,
289
  "step": 400
290
  },
291
  {
292
  "epoch": 0.7390716538981523,
293
- "grad_norm": 17.835678100585938,
294
  "learning_rate": 4.259927797833935e-05,
295
- "loss": 0.4466,
296
  "step": 410
297
  },
298
  {
299
  "epoch": 0.7570977917981072,
300
- "grad_norm": 24.48379135131836,
301
  "learning_rate": 4.24187725631769e-05,
302
- "loss": 0.4084,
303
  "step": 420
304
  },
305
  {
306
  "epoch": 0.7751239296980622,
307
- "grad_norm": 29.183530807495117,
308
  "learning_rate": 4.223826714801444e-05,
309
- "loss": 0.3588,
310
  "step": 430
311
  },
312
  {
313
  "epoch": 0.7931500675980171,
314
- "grad_norm": 16.398386001586914,
315
  "learning_rate": 4.205776173285199e-05,
316
- "loss": 0.4594,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 0.8111762054979721,
321
- "grad_norm": 24.11660385131836,
322
  "learning_rate": 4.187725631768953e-05,
323
- "loss": 0.376,
324
  "step": 450
325
  },
326
  {
327
  "epoch": 0.829202343397927,
328
- "grad_norm": 21.480924606323242,
329
  "learning_rate": 4.169675090252708e-05,
330
- "loss": 0.4035,
331
  "step": 460
332
  },
333
  {
334
  "epoch": 0.847228481297882,
335
- "grad_norm": 11.91422176361084,
336
  "learning_rate": 4.151624548736462e-05,
337
- "loss": 0.4074,
338
  "step": 470
339
  },
340
  {
341
  "epoch": 0.8652546191978369,
342
- "grad_norm": 17.17136001586914,
343
  "learning_rate": 4.1335740072202167e-05,
344
- "loss": 0.4259,
345
  "step": 480
346
  },
347
  {
348
  "epoch": 0.8832807570977917,
349
- "grad_norm": 15.024781227111816,
350
  "learning_rate": 4.115523465703972e-05,
351
- "loss": 0.413,
352
  "step": 490
353
  },
354
  {
355
  "epoch": 0.9013068949977467,
356
- "grad_norm": 15.877848625183105,
357
  "learning_rate": 4.0974729241877256e-05,
358
- "loss": 0.4013,
359
  "step": 500
360
  },
361
  {
362
  "epoch": 0.9193330328977016,
363
- "grad_norm": 20.711393356323242,
364
  "learning_rate": 4.079422382671481e-05,
365
- "loss": 0.3875,
366
  "step": 510
367
  },
368
  {
369
  "epoch": 0.9373591707976566,
370
- "grad_norm": 14.360701560974121,
371
  "learning_rate": 4.0613718411552346e-05,
372
- "loss": 0.4025,
373
  "step": 520
374
  },
375
  {
376
  "epoch": 0.9553853086976115,
377
- "grad_norm": 16.94271469116211,
378
  "learning_rate": 4.043321299638989e-05,
379
- "loss": 0.3971,
380
  "step": 530
381
  },
382
  {
383
  "epoch": 0.9734114465975665,
384
- "grad_norm": 23.218050003051758,
385
  "learning_rate": 4.0252707581227436e-05,
386
- "loss": 0.4305,
387
  "step": 540
388
  },
389
  {
390
  "epoch": 0.9914375844975214,
391
- "grad_norm": 25.439455032348633,
392
  "learning_rate": 4.007220216606498e-05,
393
- "loss": 0.3969,
394
  "step": 550
395
  },
396
  {
397
  "epoch": 1.0,
398
- "eval_f1": 0.8410362694300518,
399
- "eval_loss": 0.36654186248779297,
400
- "eval_precision": 0.7994483845547675,
401
- "eval_recall": 0.8871884564932225,
402
- "eval_runtime": 77.9824,
403
- "eval_samples_per_second": 59.924,
404
- "eval_steps_per_second": 7.502,
 
 
 
 
405
  "step": 555
406
  },
407
  {
408
  "epoch": 1.0090130689499774,
409
- "grad_norm": 23.230247497558594,
410
  "learning_rate": 3.989169675090253e-05,
411
- "loss": 0.3393,
412
  "step": 560
413
  },
414
  {
415
  "epoch": 1.0270392068499323,
416
- "grad_norm": 20.747711181640625,
417
  "learning_rate": 3.971119133574007e-05,
418
- "loss": 0.2951,
419
  "step": 570
420
  },
421
  {
422
  "epoch": 1.0450653447498874,
423
- "grad_norm": 16.3277645111084,
424
  "learning_rate": 3.953068592057762e-05,
425
- "loss": 0.3178,
426
  "step": 580
427
  },
428
  {
429
  "epoch": 1.0630914826498423,
430
- "grad_norm": 27.63875389099121,
431
  "learning_rate": 3.935018050541516e-05,
432
- "loss": 0.3043,
433
  "step": 590
434
  },
435
  {
436
  "epoch": 1.0811176205497972,
437
- "grad_norm": 24.972593307495117,
438
  "learning_rate": 3.916967509025271e-05,
439
- "loss": 0.3521,
440
  "step": 600
441
  },
442
  {
443
  "epoch": 1.099143758449752,
444
- "grad_norm": 22.260204315185547,
445
  "learning_rate": 3.898916967509025e-05,
446
- "loss": 0.2859,
447
  "step": 610
448
  },
449
  {
450
  "epoch": 1.117169896349707,
451
- "grad_norm": 18.97205924987793,
452
  "learning_rate": 3.88086642599278e-05,
453
- "loss": 0.3159,
454
  "step": 620
455
  },
456
  {
457
  "epoch": 1.135196034249662,
458
- "grad_norm": 25.5858097076416,
459
  "learning_rate": 3.862815884476535e-05,
460
- "loss": 0.2881,
461
  "step": 630
462
  },
463
  {
464
  "epoch": 1.153222172149617,
465
- "grad_norm": 16.255144119262695,
466
  "learning_rate": 3.844765342960289e-05,
467
- "loss": 0.356,
468
  "step": 640
469
  },
470
  {
471
  "epoch": 1.1712483100495719,
472
- "grad_norm": 16.83954429626465,
473
  "learning_rate": 3.826714801444044e-05,
474
- "loss": 0.2792,
475
  "step": 650
476
  },
477
  {
478
  "epoch": 1.1892744479495267,
479
- "grad_norm": 13.091219902038574,
480
  "learning_rate": 3.8086642599277976e-05,
481
- "loss": 0.3232,
482
  "step": 660
483
  },
484
  {
485
  "epoch": 1.2073005858494819,
486
- "grad_norm": 17.03887367248535,
487
  "learning_rate": 3.790613718411553e-05,
488
- "loss": 0.3081,
489
  "step": 670
490
  },
491
  {
492
  "epoch": 1.2253267237494367,
493
- "grad_norm": 24.6894588470459,
494
  "learning_rate": 3.7725631768953066e-05,
495
- "loss": 0.29,
496
  "step": 680
497
  },
498
  {
499
  "epoch": 1.2433528616493916,
500
- "grad_norm": 23.55462646484375,
501
  "learning_rate": 3.754512635379062e-05,
502
- "loss": 0.3232,
503
  "step": 690
504
  },
505
  {
506
  "epoch": 1.2613789995493465,
507
- "grad_norm": 44.3718147277832,
508
  "learning_rate": 3.7364620938628155e-05,
509
- "loss": 0.2878,
510
  "step": 700
511
  },
512
  {
513
  "epoch": 1.2794051374493014,
514
- "grad_norm": 21.339521408081055,
515
  "learning_rate": 3.718411552346571e-05,
516
- "loss": 0.2625,
517
  "step": 710
518
  },
519
  {
520
  "epoch": 1.2974312753492563,
521
- "grad_norm": 22.27944564819336,
522
  "learning_rate": 3.700361010830325e-05,
523
- "loss": 0.3261,
524
  "step": 720
525
  },
526
  {
527
  "epoch": 1.3154574132492114,
528
- "grad_norm": 20.857757568359375,
529
  "learning_rate": 3.68231046931408e-05,
530
- "loss": 0.3194,
531
  "step": 730
532
  },
533
  {
534
  "epoch": 1.3334835511491663,
535
- "grad_norm": 15.615569114685059,
536
  "learning_rate": 3.664259927797834e-05,
537
- "loss": 0.3099,
538
  "step": 740
539
  },
540
  {
541
  "epoch": 1.3515096890491212,
542
- "grad_norm": 13.210846900939941,
543
  "learning_rate": 3.646209386281589e-05,
544
- "loss": 0.2961,
545
  "step": 750
546
  },
547
  {
548
  "epoch": 1.3695358269490763,
549
- "grad_norm": 20.196102142333984,
550
  "learning_rate": 3.628158844765343e-05,
551
- "loss": 0.3099,
552
  "step": 760
553
  },
554
  {
555
  "epoch": 1.3875619648490312,
556
- "grad_norm": 14.043251991271973,
557
  "learning_rate": 3.610108303249098e-05,
558
- "loss": 0.3548,
559
  "step": 770
560
  },
561
  {
562
  "epoch": 1.405588102748986,
563
- "grad_norm": 31.82477569580078,
564
  "learning_rate": 3.592057761732852e-05,
565
- "loss": 0.3255,
566
  "step": 780
567
  },
568
  {
569
  "epoch": 1.423614240648941,
570
- "grad_norm": 19.933557510375977,
571
  "learning_rate": 3.574007220216607e-05,
572
- "loss": 0.3404,
573
  "step": 790
574
  },
575
  {
576
  "epoch": 1.4416403785488958,
577
- "grad_norm": 27.846755981445312,
578
  "learning_rate": 3.555956678700361e-05,
579
- "loss": 0.3054,
580
  "step": 800
581
  },
582
  {
583
  "epoch": 1.4596665164488507,
584
- "grad_norm": 18.607879638671875,
585
  "learning_rate": 3.537906137184116e-05,
586
- "loss": 0.2802,
587
  "step": 810
588
  },
589
  {
590
  "epoch": 1.4776926543488058,
591
- "grad_norm": 29.801652908325195,
592
  "learning_rate": 3.51985559566787e-05,
593
- "loss": 0.2975,
594
  "step": 820
595
  },
596
  {
597
  "epoch": 1.4957187922487607,
598
- "grad_norm": 21.80506134033203,
599
  "learning_rate": 3.5018050541516247e-05,
600
- "loss": 0.3251,
601
  "step": 830
602
  },
603
  {
604
  "epoch": 1.5137449301487156,
605
- "grad_norm": 12.826874732971191,
606
  "learning_rate": 3.483754512635379e-05,
607
- "loss": 0.2943,
608
  "step": 840
609
  },
610
  {
611
  "epoch": 1.5317710680486707,
612
- "grad_norm": 23.97177505493164,
613
  "learning_rate": 3.4657039711191336e-05,
614
- "loss": 0.3221,
615
  "step": 850
616
  },
617
  {
618
  "epoch": 1.5497972059486256,
619
- "grad_norm": 20.326284408569336,
620
  "learning_rate": 3.447653429602888e-05,
621
- "loss": 0.3276,
622
  "step": 860
623
  },
624
  {
625
  "epoch": 1.5678233438485805,
626
- "grad_norm": 19.27255630493164,
627
  "learning_rate": 3.4296028880866426e-05,
628
- "loss": 0.3338,
629
  "step": 870
630
  },
631
  {
632
  "epoch": 1.5858494817485354,
633
- "grad_norm": 17.299917221069336,
634
  "learning_rate": 3.411552346570397e-05,
635
- "loss": 0.3177,
636
  "step": 880
637
  },
638
  {
639
  "epoch": 1.6038756196484902,
640
- "grad_norm": 15.277181625366211,
641
  "learning_rate": 3.3935018050541516e-05,
642
- "loss": 0.2773,
643
  "step": 890
644
  },
645
  {
646
  "epoch": 1.6219017575484451,
647
- "grad_norm": 27.945833206176758,
648
  "learning_rate": 3.375451263537907e-05,
649
- "loss": 0.3417,
650
  "step": 900
651
  },
652
  {
653
  "epoch": 1.6399278954484002,
654
- "grad_norm": 25.78679847717285,
655
  "learning_rate": 3.3574007220216606e-05,
656
- "loss": 0.2959,
657
  "step": 910
658
  },
659
  {
660
  "epoch": 1.6579540333483551,
661
- "grad_norm": 17.608028411865234,
662
  "learning_rate": 3.339350180505416e-05,
663
- "loss": 0.273,
664
  "step": 920
665
  },
666
  {
667
  "epoch": 1.67598017124831,
668
- "grad_norm": 17.937299728393555,
669
  "learning_rate": 3.3212996389891696e-05,
670
- "loss": 0.3121,
671
  "step": 930
672
  },
673
  {
674
  "epoch": 1.694006309148265,
675
- "grad_norm": 22.207386016845703,
676
  "learning_rate": 3.303249097472924e-05,
677
- "loss": 0.3389,
678
  "step": 940
679
  },
680
  {
681
  "epoch": 1.71203244704822,
682
- "grad_norm": 40.351959228515625,
683
  "learning_rate": 3.2851985559566786e-05,
684
- "loss": 0.3138,
685
  "step": 950
686
  },
687
  {
688
  "epoch": 1.7300585849481749,
689
- "grad_norm": 23.845033645629883,
690
  "learning_rate": 3.267148014440433e-05,
691
- "loss": 0.2865,
692
  "step": 960
693
  },
694
  {
695
  "epoch": 1.7480847228481298,
696
- "grad_norm": 18.095985412597656,
697
  "learning_rate": 3.249097472924188e-05,
698
- "loss": 0.294,
699
  "step": 970
700
  },
701
  {
702
  "epoch": 1.7661108607480847,
703
- "grad_norm": 25.775617599487305,
704
  "learning_rate": 3.231046931407942e-05,
705
- "loss": 0.2785,
706
  "step": 980
707
  },
708
  {
709
  "epoch": 1.7841369986480395,
710
- "grad_norm": 20.0789737701416,
711
  "learning_rate": 3.212996389891697e-05,
712
- "loss": 0.3533,
713
  "step": 990
714
  },
715
  {
716
  "epoch": 1.8021631365479944,
717
- "grad_norm": 26.862163543701172,
718
  "learning_rate": 3.194945848375451e-05,
719
- "loss": 0.2957,
720
  "step": 1000
721
  },
722
  {
723
  "epoch": 1.8201892744479495,
724
- "grad_norm": 24.20423698425293,
725
  "learning_rate": 3.176895306859206e-05,
726
- "loss": 0.3195,
727
  "step": 1010
728
  },
729
  {
730
  "epoch": 1.8382154123479044,
731
- "grad_norm": 25.36762809753418,
732
  "learning_rate": 3.15884476534296e-05,
733
- "loss": 0.2823,
734
  "step": 1020
735
  },
736
  {
737
  "epoch": 1.8562415502478595,
738
- "grad_norm": 37.54848098754883,
739
  "learning_rate": 3.140794223826715e-05,
740
- "loss": 0.3557,
741
  "step": 1030
742
  },
743
  {
744
  "epoch": 1.8742676881478144,
745
- "grad_norm": 24.26515769958496,
746
  "learning_rate": 3.12274368231047e-05,
747
- "loss": 0.3194,
748
  "step": 1040
749
  },
750
  {
751
  "epoch": 1.8922938260477693,
752
- "grad_norm": 22.1879940032959,
753
  "learning_rate": 3.104693140794224e-05,
754
- "loss": 0.2912,
755
  "step": 1050
756
  },
757
  {
758
  "epoch": 1.9103199639477242,
759
- "grad_norm": 21.703449249267578,
760
  "learning_rate": 3.086642599277979e-05,
761
- "loss": 0.2635,
762
  "step": 1060
763
  },
764
  {
765
  "epoch": 1.928346101847679,
766
- "grad_norm": 33.39324951171875,
767
  "learning_rate": 3.0685920577617325e-05,
768
- "loss": 0.2943,
769
  "step": 1070
770
  },
771
  {
772
  "epoch": 1.946372239747634,
773
- "grad_norm": 21.398008346557617,
774
  "learning_rate": 3.0505415162454877e-05,
775
- "loss": 0.353,
776
  "step": 1080
777
  },
778
  {
779
  "epoch": 1.9643983776475888,
780
- "grad_norm": 17.463472366333008,
781
  "learning_rate": 3.032490974729242e-05,
782
- "loss": 0.2934,
783
  "step": 1090
784
  },
785
  {
786
  "epoch": 1.982424515547544,
787
- "grad_norm": 32.96004104614258,
788
  "learning_rate": 3.0144404332129967e-05,
789
- "loss": 0.3206,
790
  "step": 1100
791
  },
792
  {
793
  "epoch": 2.0,
794
- "grad_norm": 22.864667892456055,
795
  "learning_rate": 2.996389891696751e-05,
796
- "loss": 0.2765,
797
  "step": 1110
798
  },
799
  {
800
  "epoch": 2.0,
801
- "eval_f1": 0.8415584415584415,
802
- "eval_loss": 0.3972894847393036,
803
- "eval_precision": 0.7748344370860927,
804
- "eval_recall": 0.9208570179274158,
805
- "eval_runtime": 67.5759,
806
- "eval_samples_per_second": 69.152,
807
- "eval_steps_per_second": 8.657,
 
 
 
 
808
  "step": 1110
809
  }
810
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.018026137899954935,
13
+ "grad_norm": 37.54188919067383,
14
  "learning_rate": 4.981949458483755e-05,
15
+ "loss": 0.8911,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.03605227579990987,
20
+ "grad_norm": 15.440231323242188,
21
  "learning_rate": 4.963898916967509e-05,
22
+ "loss": 0.6165,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.054078413699864804,
27
+ "grad_norm": 226.8269500732422,
28
  "learning_rate": 4.945848375451264e-05,
29
+ "loss": 0.5151,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.07210455159981974,
34
+ "grad_norm": 21.70491600036621,
35
  "learning_rate": 4.927797833935018e-05,
36
+ "loss": 0.5669,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.09013068949977468,
41
+ "grad_norm": 79.27781677246094,
42
  "learning_rate": 4.909747292418773e-05,
43
+ "loss": 0.5119,
44
  "step": 50
45
  },
46
  {
47
  "epoch": 0.10815682739972961,
48
+ "grad_norm": 15.314950942993164,
49
  "learning_rate": 4.891696750902527e-05,
50
+ "loss": 0.474,
51
  "step": 60
52
  },
53
  {
54
  "epoch": 0.12618296529968454,
55
+ "grad_norm": 25.575763702392578,
56
  "learning_rate": 4.873646209386282e-05,
57
+ "loss": 0.478,
58
  "step": 70
59
  },
60
  {
61
  "epoch": 0.14420910319963948,
62
+ "grad_norm": 27.453636169433594,
63
  "learning_rate": 4.855595667870036e-05,
64
+ "loss": 0.4285,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.16223524109959442,
69
+ "grad_norm": 34.88062286376953,
70
  "learning_rate": 4.837545126353791e-05,
71
+ "loss": 0.4317,
72
  "step": 90
73
  },
74
  {
75
  "epoch": 0.18026137899954936,
76
+ "grad_norm": 46.823577880859375,
77
  "learning_rate": 4.819494584837546e-05,
78
+ "loss": 0.4899,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 0.19828751689950427,
83
+ "grad_norm": 36.73914337158203,
84
  "learning_rate": 4.8014440433213e-05,
85
+ "loss": 0.4839,
86
  "step": 110
87
  },
88
  {
89
  "epoch": 0.21631365479945922,
90
+ "grad_norm": 102.61051940917969,
91
  "learning_rate": 4.783393501805055e-05,
92
+ "loss": 0.4795,
93
  "step": 120
94
  },
95
  {
96
  "epoch": 0.23433979269941416,
97
+ "grad_norm": 24.9108829498291,
98
  "learning_rate": 4.765342960288809e-05,
99
+ "loss": 0.4332,
100
  "step": 130
101
  },
102
  {
103
  "epoch": 0.25236593059936907,
104
+ "grad_norm": 14.366719245910645,
105
  "learning_rate": 4.747292418772563e-05,
106
+ "loss": 0.4811,
107
  "step": 140
108
  },
109
  {
110
  "epoch": 0.270392068499324,
111
+ "grad_norm": 19.95521354675293,
112
  "learning_rate": 4.7292418772563177e-05,
113
+ "loss": 0.4544,
114
  "step": 150
115
  },
116
  {
117
  "epoch": 0.28841820639927895,
118
+ "grad_norm": 20.914020538330078,
119
  "learning_rate": 4.711191335740072e-05,
120
+ "loss": 0.4646,
121
  "step": 160
122
  },
123
  {
124
  "epoch": 0.3064443442992339,
125
+ "grad_norm": 154.6277313232422,
126
  "learning_rate": 4.693140794223827e-05,
127
+ "loss": 0.47,
128
  "step": 170
129
  },
130
  {
131
  "epoch": 0.32447048219918884,
132
+ "grad_norm": 30.73076820373535,
133
  "learning_rate": 4.675090252707581e-05,
134
+ "loss": 0.4341,
135
  "step": 180
136
  },
137
  {
138
  "epoch": 0.3424966200991438,
139
+ "grad_norm": 14.621489524841309,
140
  "learning_rate": 4.657039711191336e-05,
141
+ "loss": 0.5163,
142
  "step": 190
143
  },
144
  {
145
  "epoch": 0.3605227579990987,
146
+ "grad_norm": 25.546030044555664,
147
  "learning_rate": 4.63898916967509e-05,
148
+ "loss": 0.4601,
149
  "step": 200
150
  },
151
  {
152
  "epoch": 0.3785488958990536,
153
+ "grad_norm": 18.222177505493164,
154
  "learning_rate": 4.620938628158845e-05,
155
+ "loss": 0.4398,
156
  "step": 210
157
  },
158
  {
159
  "epoch": 0.39657503379900855,
160
+ "grad_norm": 20.1109676361084,
161
  "learning_rate": 4.602888086642599e-05,
162
+ "loss": 0.432,
163
  "step": 220
164
  },
165
  {
166
  "epoch": 0.4146011716989635,
167
+ "grad_norm": 13.729535102844238,
168
  "learning_rate": 4.584837545126354e-05,
169
+ "loss": 0.4327,
170
  "step": 230
171
  },
172
  {
173
  "epoch": 0.43262730959891843,
174
+ "grad_norm": 32.50856018066406,
175
  "learning_rate": 4.566787003610109e-05,
176
+ "loss": 0.4208,
177
  "step": 240
178
  },
179
  {
180
  "epoch": 0.45065344749887337,
181
+ "grad_norm": 15.05933666229248,
182
  "learning_rate": 4.548736462093863e-05,
183
+ "loss": 0.4451,
184
  "step": 250
185
  },
186
  {
187
  "epoch": 0.4686795853988283,
188
+ "grad_norm": 31.787078857421875,
189
  "learning_rate": 4.530685920577618e-05,
190
+ "loss": 0.4376,
191
  "step": 260
192
  },
193
  {
194
  "epoch": 0.48670572329878325,
195
+ "grad_norm": 25.446210861206055,
196
  "learning_rate": 4.5126353790613716e-05,
197
+ "loss": 0.4131,
198
  "step": 270
199
  },
200
  {
201
  "epoch": 0.5047318611987381,
202
+ "grad_norm": 31.328969955444336,
203
  "learning_rate": 4.494584837545127e-05,
204
+ "loss": 0.4194,
205
  "step": 280
206
  },
207
  {
208
  "epoch": 0.5227579990986931,
209
+ "grad_norm": 255.53672790527344,
210
  "learning_rate": 4.4765342960288806e-05,
211
+ "loss": 0.4984,
212
  "step": 290
213
  },
214
  {
215
  "epoch": 0.540784136998648,
216
+ "grad_norm": 14.31953239440918,
217
  "learning_rate": 4.458483754512636e-05,
218
+ "loss": 0.4417,
219
  "step": 300
220
  },
221
  {
222
  "epoch": 0.558810274898603,
223
+ "grad_norm": 26.942129135131836,
224
  "learning_rate": 4.44043321299639e-05,
225
+ "loss": 0.4527,
226
  "step": 310
227
  },
228
  {
229
  "epoch": 0.5768364127985579,
230
+ "grad_norm": 36.217586517333984,
231
  "learning_rate": 4.422382671480145e-05,
232
+ "loss": 0.4485,
233
  "step": 320
234
  },
235
  {
236
  "epoch": 0.5948625506985128,
237
+ "grad_norm": 38.54646682739258,
238
  "learning_rate": 4.404332129963899e-05,
239
+ "loss": 0.4579,
240
  "step": 330
241
  },
242
  {
243
  "epoch": 0.6128886885984678,
244
+ "grad_norm": 21.504106521606445,
245
  "learning_rate": 4.386281588447654e-05,
246
+ "loss": 0.4117,
247
  "step": 340
248
  },
249
  {
250
  "epoch": 0.6309148264984227,
251
+ "grad_norm": 16.283788681030273,
252
  "learning_rate": 4.368231046931408e-05,
253
+ "loss": 0.438,
254
  "step": 350
255
  },
256
  {
257
  "epoch": 0.6489409643983777,
258
+ "grad_norm": 33.57164001464844,
259
  "learning_rate": 4.350180505415163e-05,
260
+ "loss": 0.4043,
261
  "step": 360
262
  },
263
  {
264
  "epoch": 0.6669671022983326,
265
+ "grad_norm": 12.821700096130371,
266
  "learning_rate": 4.332129963898917e-05,
267
+ "loss": 0.4119,
268
  "step": 370
269
  },
270
  {
271
  "epoch": 0.6849932401982876,
272
+ "grad_norm": 23.477996826171875,
273
  "learning_rate": 4.314079422382672e-05,
274
+ "loss": 0.3783,
275
  "step": 380
276
  },
277
  {
278
  "epoch": 0.7030193780982424,
279
+ "grad_norm": 18.2862548828125,
280
  "learning_rate": 4.296028880866426e-05,
281
+ "loss": 0.4023,
282
  "step": 390
283
  },
284
  {
285
  "epoch": 0.7210455159981974,
286
+ "grad_norm": 19.799530029296875,
287
  "learning_rate": 4.277978339350181e-05,
288
+ "loss": 0.419,
289
  "step": 400
290
  },
291
  {
292
  "epoch": 0.7390716538981523,
293
+ "grad_norm": 17.081830978393555,
294
  "learning_rate": 4.259927797833935e-05,
295
+ "loss": 0.467,
296
  "step": 410
297
  },
298
  {
299
  "epoch": 0.7570977917981072,
300
+ "grad_norm": 24.008344650268555,
301
  "learning_rate": 4.24187725631769e-05,
302
+ "loss": 0.4302,
303
  "step": 420
304
  },
305
  {
306
  "epoch": 0.7751239296980622,
307
+ "grad_norm": 40.07936477661133,
308
  "learning_rate": 4.223826714801444e-05,
309
+ "loss": 0.3622,
310
  "step": 430
311
  },
312
  {
313
  "epoch": 0.7931500675980171,
314
+ "grad_norm": 19.614171981811523,
315
  "learning_rate": 4.205776173285199e-05,
316
+ "loss": 0.4645,
317
  "step": 440
318
  },
319
  {
320
  "epoch": 0.8111762054979721,
321
+ "grad_norm": 32.0594596862793,
322
  "learning_rate": 4.187725631768953e-05,
323
+ "loss": 0.3755,
324
  "step": 450
325
  },
326
  {
327
  "epoch": 0.829202343397927,
328
+ "grad_norm": 15.177023887634277,
329
  "learning_rate": 4.169675090252708e-05,
330
+ "loss": 0.4275,
331
  "step": 460
332
  },
333
  {
334
  "epoch": 0.847228481297882,
335
+ "grad_norm": 11.258400917053223,
336
  "learning_rate": 4.151624548736462e-05,
337
+ "loss": 0.4137,
338
  "step": 470
339
  },
340
  {
341
  "epoch": 0.8652546191978369,
342
+ "grad_norm": 13.211421012878418,
343
  "learning_rate": 4.1335740072202167e-05,
344
+ "loss": 0.4337,
345
  "step": 480
346
  },
347
  {
348
  "epoch": 0.8832807570977917,
349
+ "grad_norm": 14.50296401977539,
350
  "learning_rate": 4.115523465703972e-05,
351
+ "loss": 0.4318,
352
  "step": 490
353
  },
354
  {
355
  "epoch": 0.9013068949977467,
356
+ "grad_norm": 21.239362716674805,
357
  "learning_rate": 4.0974729241877256e-05,
358
+ "loss": 0.3939,
359
  "step": 500
360
  },
361
  {
362
  "epoch": 0.9193330328977016,
363
+ "grad_norm": 18.6204891204834,
364
  "learning_rate": 4.079422382671481e-05,
365
+ "loss": 0.3838,
366
  "step": 510
367
  },
368
  {
369
  "epoch": 0.9373591707976566,
370
+ "grad_norm": 21.722551345825195,
371
  "learning_rate": 4.0613718411552346e-05,
372
+ "loss": 0.4236,
373
  "step": 520
374
  },
375
  {
376
  "epoch": 0.9553853086976115,
377
+ "grad_norm": 23.824357986450195,
378
  "learning_rate": 4.043321299638989e-05,
379
+ "loss": 0.4062,
380
  "step": 530
381
  },
382
  {
383
  "epoch": 0.9734114465975665,
384
+ "grad_norm": 18.93727684020996,
385
  "learning_rate": 4.0252707581227436e-05,
386
+ "loss": 0.4412,
387
  "step": 540
388
  },
389
  {
390
  "epoch": 0.9914375844975214,
391
+ "grad_norm": 16.050817489624023,
392
  "learning_rate": 4.007220216606498e-05,
393
+ "loss": 0.4142,
394
  "step": 550
395
  },
396
  {
397
  "epoch": 1.0,
398
+ "eval_f1": 0.8390086641144469,
399
+ "eval_fn": 205,
400
+ "eval_fp": 594,
401
+ "eval_loss": 0.3744712173938751,
402
+ "eval_precision": 0.7780269058295964,
403
+ "eval_recall": 0.910362920857018,
404
+ "eval_runtime": 73.1391,
405
+ "eval_samples_per_second": 63.892,
406
+ "eval_steps_per_second": 7.998,
407
+ "eval_tn": 1792,
408
+ "eval_tp": 2082,
409
  "step": 555
410
  },
411
  {
412
  "epoch": 1.0090130689499774,
413
+ "grad_norm": 26.528642654418945,
414
  "learning_rate": 3.989169675090253e-05,
415
+ "loss": 0.3474,
416
  "step": 560
417
  },
418
  {
419
  "epoch": 1.0270392068499323,
420
+ "grad_norm": 18.92278289794922,
421
  "learning_rate": 3.971119133574007e-05,
422
+ "loss": 0.2881,
423
  "step": 570
424
  },
425
  {
426
  "epoch": 1.0450653447498874,
427
+ "grad_norm": 15.88642406463623,
428
  "learning_rate": 3.953068592057762e-05,
429
+ "loss": 0.3399,
430
  "step": 580
431
  },
432
  {
433
  "epoch": 1.0630914826498423,
434
+ "grad_norm": 27.55136489868164,
435
  "learning_rate": 3.935018050541516e-05,
436
+ "loss": 0.3125,
437
  "step": 590
438
  },
439
  {
440
  "epoch": 1.0811176205497972,
441
+ "grad_norm": 18.58675765991211,
442
  "learning_rate": 3.916967509025271e-05,
443
+ "loss": 0.3626,
444
  "step": 600
445
  },
446
  {
447
  "epoch": 1.099143758449752,
448
+ "grad_norm": 22.22069549560547,
449
  "learning_rate": 3.898916967509025e-05,
450
+ "loss": 0.2978,
451
  "step": 610
452
  },
453
  {
454
  "epoch": 1.117169896349707,
455
+ "grad_norm": 29.50828742980957,
456
  "learning_rate": 3.88086642599278e-05,
457
+ "loss": 0.3334,
458
  "step": 620
459
  },
460
  {
461
  "epoch": 1.135196034249662,
462
+ "grad_norm": 18.485353469848633,
463
  "learning_rate": 3.862815884476535e-05,
464
+ "loss": 0.3025,
465
  "step": 630
466
  },
467
  {
468
  "epoch": 1.153222172149617,
469
+ "grad_norm": 16.8847599029541,
470
  "learning_rate": 3.844765342960289e-05,
471
+ "loss": 0.329,
472
  "step": 640
473
  },
474
  {
475
  "epoch": 1.1712483100495719,
476
+ "grad_norm": 25.133949279785156,
477
  "learning_rate": 3.826714801444044e-05,
478
+ "loss": 0.2712,
479
  "step": 650
480
  },
481
  {
482
  "epoch": 1.1892744479495267,
483
+ "grad_norm": 15.687613487243652,
484
  "learning_rate": 3.8086642599277976e-05,
485
+ "loss": 0.345,
486
  "step": 660
487
  },
488
  {
489
  "epoch": 1.2073005858494819,
490
+ "grad_norm": 18.612354278564453,
491
  "learning_rate": 3.790613718411553e-05,
492
+ "loss": 0.3101,
493
  "step": 670
494
  },
495
  {
496
  "epoch": 1.2253267237494367,
497
+ "grad_norm": 23.197965621948242,
498
  "learning_rate": 3.7725631768953066e-05,
499
+ "loss": 0.3023,
500
  "step": 680
501
  },
502
  {
503
  "epoch": 1.2433528616493916,
504
+ "grad_norm": 20.018022537231445,
505
  "learning_rate": 3.754512635379062e-05,
506
+ "loss": 0.3302,
507
  "step": 690
508
  },
509
  {
510
  "epoch": 1.2613789995493465,
511
+ "grad_norm": 55.68796157836914,
512
  "learning_rate": 3.7364620938628155e-05,
513
+ "loss": 0.3265,
514
  "step": 700
515
  },
516
  {
517
  "epoch": 1.2794051374493014,
518
+ "grad_norm": 18.61658477783203,
519
  "learning_rate": 3.718411552346571e-05,
520
+ "loss": 0.2827,
521
  "step": 710
522
  },
523
  {
524
  "epoch": 1.2974312753492563,
525
+ "grad_norm": 20.81103515625,
526
  "learning_rate": 3.700361010830325e-05,
527
+ "loss": 0.3427,
528
  "step": 720
529
  },
530
  {
531
  "epoch": 1.3154574132492114,
532
+ "grad_norm": 20.623382568359375,
533
  "learning_rate": 3.68231046931408e-05,
534
+ "loss": 0.3241,
535
  "step": 730
536
  },
537
  {
538
  "epoch": 1.3334835511491663,
539
+ "grad_norm": 18.186420440673828,
540
  "learning_rate": 3.664259927797834e-05,
541
+ "loss": 0.3692,
542
  "step": 740
543
  },
544
  {
545
  "epoch": 1.3515096890491212,
546
+ "grad_norm": 19.591402053833008,
547
  "learning_rate": 3.646209386281589e-05,
548
+ "loss": 0.3237,
549
  "step": 750
550
  },
551
  {
552
  "epoch": 1.3695358269490763,
553
+ "grad_norm": 29.652109146118164,
554
  "learning_rate": 3.628158844765343e-05,
555
+ "loss": 0.3223,
556
  "step": 760
557
  },
558
  {
559
  "epoch": 1.3875619648490312,
560
+ "grad_norm": 13.557708740234375,
561
  "learning_rate": 3.610108303249098e-05,
562
+ "loss": 0.3534,
563
  "step": 770
564
  },
565
  {
566
  "epoch": 1.405588102748986,
567
+ "grad_norm": 32.48282241821289,
568
  "learning_rate": 3.592057761732852e-05,
569
+ "loss": 0.3476,
570
  "step": 780
571
  },
572
  {
573
  "epoch": 1.423614240648941,
574
+ "grad_norm": 18.360185623168945,
575
  "learning_rate": 3.574007220216607e-05,
576
+ "loss": 0.3506,
577
  "step": 790
578
  },
579
  {
580
  "epoch": 1.4416403785488958,
581
+ "grad_norm": 18.72186279296875,
582
  "learning_rate": 3.555956678700361e-05,
583
+ "loss": 0.3169,
584
  "step": 800
585
  },
586
  {
587
  "epoch": 1.4596665164488507,
588
+ "grad_norm": 20.742034912109375,
589
  "learning_rate": 3.537906137184116e-05,
590
+ "loss": 0.2974,
591
  "step": 810
592
  },
593
  {
594
  "epoch": 1.4776926543488058,
595
+ "grad_norm": 27.217145919799805,
596
  "learning_rate": 3.51985559566787e-05,
597
+ "loss": 0.3038,
598
  "step": 820
599
  },
600
  {
601
  "epoch": 1.4957187922487607,
602
+ "grad_norm": 27.99778175354004,
603
  "learning_rate": 3.5018050541516247e-05,
604
+ "loss": 0.3197,
605
  "step": 830
606
  },
607
  {
608
  "epoch": 1.5137449301487156,
609
+ "grad_norm": 18.663578033447266,
610
  "learning_rate": 3.483754512635379e-05,
611
+ "loss": 0.2899,
612
  "step": 840
613
  },
614
  {
615
  "epoch": 1.5317710680486707,
616
+ "grad_norm": 19.825780868530273,
617
  "learning_rate": 3.4657039711191336e-05,
618
+ "loss": 0.3086,
619
  "step": 850
620
  },
621
  {
622
  "epoch": 1.5497972059486256,
623
+ "grad_norm": 25.401752471923828,
624
  "learning_rate": 3.447653429602888e-05,
625
+ "loss": 0.3261,
626
  "step": 860
627
  },
628
  {
629
  "epoch": 1.5678233438485805,
630
+ "grad_norm": 25.98396110534668,
631
  "learning_rate": 3.4296028880866426e-05,
632
+ "loss": 0.3437,
633
  "step": 870
634
  },
635
  {
636
  "epoch": 1.5858494817485354,
637
+ "grad_norm": 21.777820587158203,
638
  "learning_rate": 3.411552346570397e-05,
639
+ "loss": 0.3243,
640
  "step": 880
641
  },
642
  {
643
  "epoch": 1.6038756196484902,
644
+ "grad_norm": 12.681973457336426,
645
  "learning_rate": 3.3935018050541516e-05,
646
+ "loss": 0.301,
647
  "step": 890
648
  },
649
  {
650
  "epoch": 1.6219017575484451,
651
+ "grad_norm": 15.141433715820312,
652
  "learning_rate": 3.375451263537907e-05,
653
+ "loss": 0.3557,
654
  "step": 900
655
  },
656
  {
657
  "epoch": 1.6399278954484002,
658
+ "grad_norm": 24.14081382751465,
659
  "learning_rate": 3.3574007220216606e-05,
660
+ "loss": 0.315,
661
  "step": 910
662
  },
663
  {
664
  "epoch": 1.6579540333483551,
665
+ "grad_norm": 17.701383590698242,
666
  "learning_rate": 3.339350180505416e-05,
667
+ "loss": 0.2934,
668
  "step": 920
669
  },
670
  {
671
  "epoch": 1.67598017124831,
672
+ "grad_norm": 15.166337013244629,
673
  "learning_rate": 3.3212996389891696e-05,
674
+ "loss": 0.3339,
675
  "step": 930
676
  },
677
  {
678
  "epoch": 1.694006309148265,
679
+ "grad_norm": 19.966903686523438,
680
  "learning_rate": 3.303249097472924e-05,
681
+ "loss": 0.3499,
682
  "step": 940
683
  },
684
  {
685
  "epoch": 1.71203244704822,
686
+ "grad_norm": 44.75580596923828,
687
  "learning_rate": 3.2851985559566786e-05,
688
+ "loss": 0.3243,
689
  "step": 950
690
  },
691
  {
692
  "epoch": 1.7300585849481749,
693
+ "grad_norm": 20.718111038208008,
694
  "learning_rate": 3.267148014440433e-05,
695
+ "loss": 0.3045,
696
  "step": 960
697
  },
698
  {
699
  "epoch": 1.7480847228481298,
700
+ "grad_norm": 22.213359832763672,
701
  "learning_rate": 3.249097472924188e-05,
702
+ "loss": 0.3172,
703
  "step": 970
704
  },
705
  {
706
  "epoch": 1.7661108607480847,
707
+ "grad_norm": 26.86611557006836,
708
  "learning_rate": 3.231046931407942e-05,
709
+ "loss": 0.3018,
710
  "step": 980
711
  },
712
  {
713
  "epoch": 1.7841369986480395,
714
+ "grad_norm": 17.582984924316406,
715
  "learning_rate": 3.212996389891697e-05,
716
+ "loss": 0.3658,
717
  "step": 990
718
  },
719
  {
720
  "epoch": 1.8021631365479944,
721
+ "grad_norm": 35.41366958618164,
722
  "learning_rate": 3.194945848375451e-05,
723
+ "loss": 0.295,
724
  "step": 1000
725
  },
726
  {
727
  "epoch": 1.8201892744479495,
728
+ "grad_norm": 29.620559692382812,
729
  "learning_rate": 3.176895306859206e-05,
730
+ "loss": 0.3218,
731
  "step": 1010
732
  },
733
  {
734
  "epoch": 1.8382154123479044,
735
+ "grad_norm": 26.717348098754883,
736
  "learning_rate": 3.15884476534296e-05,
737
+ "loss": 0.2965,
738
  "step": 1020
739
  },
740
  {
741
  "epoch": 1.8562415502478595,
742
+ "grad_norm": 32.59479522705078,
743
  "learning_rate": 3.140794223826715e-05,
744
+ "loss": 0.3378,
745
  "step": 1030
746
  },
747
  {
748
  "epoch": 1.8742676881478144,
749
+ "grad_norm": 17.58369255065918,
750
  "learning_rate": 3.12274368231047e-05,
751
+ "loss": 0.323,
752
  "step": 1040
753
  },
754
  {
755
  "epoch": 1.8922938260477693,
756
+ "grad_norm": 30.458721160888672,
757
  "learning_rate": 3.104693140794224e-05,
758
+ "loss": 0.3001,
759
  "step": 1050
760
  },
761
  {
762
  "epoch": 1.9103199639477242,
763
+ "grad_norm": 21.08568000793457,
764
  "learning_rate": 3.086642599277979e-05,
765
+ "loss": 0.2869,
766
  "step": 1060
767
  },
768
  {
769
  "epoch": 1.928346101847679,
770
+ "grad_norm": 27.382286071777344,
771
  "learning_rate": 3.0685920577617325e-05,
772
+ "loss": 0.2954,
773
  "step": 1070
774
  },
775
  {
776
  "epoch": 1.946372239747634,
777
+ "grad_norm": 18.023101806640625,
778
  "learning_rate": 3.0505415162454877e-05,
779
+ "loss": 0.3475,
780
  "step": 1080
781
  },
782
  {
783
  "epoch": 1.9643983776475888,
784
+ "grad_norm": 21.446565628051758,
785
  "learning_rate": 3.032490974729242e-05,
786
+ "loss": 0.3167,
787
  "step": 1090
788
  },
789
  {
790
  "epoch": 1.982424515547544,
791
+ "grad_norm": 27.844911575317383,
792
  "learning_rate": 3.0144404332129967e-05,
793
+ "loss": 0.321,
794
  "step": 1100
795
  },
796
  {
797
  "epoch": 2.0,
798
+ "grad_norm": 23.883480072021484,
799
  "learning_rate": 2.996389891696751e-05,
800
+ "loss": 0.2806,
801
  "step": 1110
802
  },
803
  {
804
  "epoch": 2.0,
805
+ "eval_f1": 0.8423625254582485,
806
+ "eval_fn": 219,
807
+ "eval_fp": 555,
808
+ "eval_loss": 0.38451844453811646,
809
+ "eval_precision": 0.7884102173084254,
810
+ "eval_recall": 0.9042413642326191,
811
+ "eval_runtime": 61.8973,
812
+ "eval_samples_per_second": 75.496,
813
+ "eval_steps_per_second": 9.451,
814
+ "eval_tn": 1831,
815
+ "eval_tp": 2068,
816
  "step": 1110
817
  }
818
  ],
checkpoint-1110/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd360ff6566a7421740b08164da56480ee843ecd9962d79899c9150e4be0d68e
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:810629f6569bf22bd4122bdaa88d9cc0d6cea1b331667027a3cd4891b919dd14
3
  size 5432