anoaky commited on
Commit
f97c2f4
·
verified ·
1 Parent(s): 2fdd0f0

Training in progress, epoch 1, checkpoint

Browse files
checkpoint-555/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f42bfef94167af57aba166502e2002c8cd03dfb028532c18f5fa3a2747f2f97
3
  size 1629432864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:431d86bc92271d1ef5e64d393077b7e6cc36c837e77cc0dc2045c95b2a5b43a1
3
  size 1629432864
checkpoint-555/trainer_state.json CHANGED
@@ -2,7 +2,7 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
- "eval_steps": 693,
6
  "global_step": 555,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
@@ -10,405 +10,38 @@
10
  "log_history": [
11
  {
12
  "epoch": 0,
13
- "eval_f1": 0.14772727272727273,
14
- "eval_loss": 0.8850772380828857,
15
- "eval_precision": 0.08080808080808081,
16
- "eval_recall": 0.859504132231405,
17
- "eval_runtime": 99.3343,
18
- "eval_samples_per_second": 47.043,
19
- "eval_steps_per_second": 5.889,
20
  "step": 0
21
  },
22
  {
23
- "epoch": 0.018026137899954935,
24
- "grad_norm": 6.000438690185547,
25
- "learning_rate": 4.981949458483755e-05,
26
- "loss": 0.3931,
27
- "step": 10
28
  },
29
  {
30
- "epoch": 0.03605227579990987,
31
- "grad_norm": 47.497467041015625,
32
- "learning_rate": 4.963898916967509e-05,
33
- "loss": 0.2607,
34
- "step": 20
35
- },
36
- {
37
- "epoch": 0.054078413699864804,
38
- "grad_norm": 13.464141845703125,
39
- "learning_rate": 4.945848375451264e-05,
40
- "loss": 0.1813,
41
- "step": 30
42
- },
43
- {
44
- "epoch": 0.07210455159981974,
45
- "grad_norm": 15.57222843170166,
46
- "learning_rate": 4.927797833935018e-05,
47
- "loss": 0.2618,
48
- "step": 40
49
- },
50
- {
51
- "epoch": 0.09013068949977468,
52
- "grad_norm": 11.892234802246094,
53
- "learning_rate": 4.909747292418773e-05,
54
- "loss": 0.1559,
55
- "step": 50
56
- },
57
- {
58
- "epoch": 0.10815682739972961,
59
- "grad_norm": 6.340595722198486,
60
- "learning_rate": 4.891696750902527e-05,
61
- "loss": 0.1648,
62
- "step": 60
63
- },
64
- {
65
- "epoch": 0.12618296529968454,
66
- "grad_norm": 10.196683883666992,
67
- "learning_rate": 4.873646209386282e-05,
68
- "loss": 0.1472,
69
- "step": 70
70
- },
71
- {
72
- "epoch": 0.14420910319963948,
73
- "grad_norm": 11.511515617370605,
74
- "learning_rate": 4.855595667870036e-05,
75
- "loss": 0.2108,
76
- "step": 80
77
- },
78
- {
79
- "epoch": 0.16223524109959442,
80
- "grad_norm": 8.681073188781738,
81
- "learning_rate": 4.837545126353791e-05,
82
- "loss": 0.1561,
83
- "step": 90
84
- },
85
- {
86
- "epoch": 0.18026137899954936,
87
- "grad_norm": 41.062801361083984,
88
- "learning_rate": 4.819494584837546e-05,
89
- "loss": 0.1563,
90
- "step": 100
91
- },
92
- {
93
- "epoch": 0.19828751689950427,
94
- "grad_norm": 19.93891716003418,
95
- "learning_rate": 4.8014440433213e-05,
96
- "loss": 0.1881,
97
- "step": 110
98
- },
99
- {
100
- "epoch": 0.21631365479945922,
101
- "grad_norm": 21.90877914428711,
102
- "learning_rate": 4.783393501805055e-05,
103
- "loss": 0.1262,
104
- "step": 120
105
- },
106
- {
107
- "epoch": 0.23433979269941416,
108
- "grad_norm": 14.123740196228027,
109
- "learning_rate": 4.765342960288809e-05,
110
- "loss": 0.1535,
111
- "step": 130
112
- },
113
- {
114
- "epoch": 0.25236593059936907,
115
- "grad_norm": 30.671157836914062,
116
- "learning_rate": 4.747292418772563e-05,
117
- "loss": 0.1458,
118
- "step": 140
119
- },
120
- {
121
- "epoch": 0.270392068499324,
122
- "grad_norm": 7.560745716094971,
123
- "learning_rate": 4.7292418772563177e-05,
124
- "loss": 0.1797,
125
- "step": 150
126
- },
127
- {
128
- "epoch": 0.28841820639927895,
129
- "grad_norm": 12.448927879333496,
130
- "learning_rate": 4.711191335740072e-05,
131
- "loss": 0.1898,
132
- "step": 160
133
- },
134
- {
135
- "epoch": 0.3064443442992339,
136
- "grad_norm": 20.040008544921875,
137
- "learning_rate": 4.693140794223827e-05,
138
- "loss": 0.1698,
139
- "step": 170
140
- },
141
- {
142
- "epoch": 0.32447048219918884,
143
- "grad_norm": 7.592446804046631,
144
- "learning_rate": 4.675090252707581e-05,
145
- "loss": 0.156,
146
- "step": 180
147
- },
148
- {
149
- "epoch": 0.3424966200991438,
150
- "grad_norm": 18.830904006958008,
151
- "learning_rate": 4.657039711191336e-05,
152
- "loss": 0.1536,
153
- "step": 190
154
- },
155
- {
156
- "epoch": 0.3605227579990987,
157
- "grad_norm": 3.684938430786133,
158
- "learning_rate": 4.63898916967509e-05,
159
- "loss": 0.1152,
160
- "step": 200
161
- },
162
- {
163
- "epoch": 0.3785488958990536,
164
- "grad_norm": 13.293309211730957,
165
- "learning_rate": 4.620938628158845e-05,
166
- "loss": 0.2116,
167
- "step": 210
168
- },
169
- {
170
- "epoch": 0.39657503379900855,
171
- "grad_norm": 14.630786895751953,
172
- "learning_rate": 4.602888086642599e-05,
173
- "loss": 0.1549,
174
- "step": 220
175
- },
176
- {
177
- "epoch": 0.4146011716989635,
178
- "grad_norm": 20.37642478942871,
179
- "learning_rate": 4.584837545126354e-05,
180
- "loss": 0.1679,
181
- "step": 230
182
- },
183
- {
184
- "epoch": 0.43262730959891843,
185
- "grad_norm": 13.183340072631836,
186
- "learning_rate": 4.566787003610109e-05,
187
- "loss": 0.1581,
188
- "step": 240
189
- },
190
- {
191
- "epoch": 0.45065344749887337,
192
- "grad_norm": 21.777263641357422,
193
- "learning_rate": 4.548736462093863e-05,
194
- "loss": 0.1618,
195
- "step": 250
196
- },
197
- {
198
- "epoch": 0.4686795853988283,
199
- "grad_norm": 20.683944702148438,
200
- "learning_rate": 4.530685920577618e-05,
201
- "loss": 0.1606,
202
- "step": 260
203
- },
204
- {
205
- "epoch": 0.48670572329878325,
206
- "grad_norm": 30.506752014160156,
207
- "learning_rate": 4.5126353790613716e-05,
208
- "loss": 0.1831,
209
- "step": 270
210
- },
211
- {
212
- "epoch": 0.5047318611987381,
213
- "grad_norm": 22.953815460205078,
214
- "learning_rate": 4.494584837545127e-05,
215
- "loss": 0.1565,
216
- "step": 280
217
- },
218
- {
219
- "epoch": 0.5227579990986931,
220
- "grad_norm": 6.177365303039551,
221
- "learning_rate": 4.4765342960288806e-05,
222
- "loss": 0.1287,
223
- "step": 290
224
- },
225
- {
226
- "epoch": 0.540784136998648,
227
- "grad_norm": 8.373098373413086,
228
- "learning_rate": 4.458483754512636e-05,
229
- "loss": 0.1553,
230
- "step": 300
231
- },
232
- {
233
- "epoch": 0.558810274898603,
234
- "grad_norm": 10.07934284210205,
235
- "learning_rate": 4.44043321299639e-05,
236
- "loss": 0.1868,
237
- "step": 310
238
- },
239
- {
240
- "epoch": 0.5768364127985579,
241
- "grad_norm": 18.686702728271484,
242
- "learning_rate": 4.422382671480145e-05,
243
- "loss": 0.1576,
244
- "step": 320
245
- },
246
- {
247
- "epoch": 0.5948625506985128,
248
- "grad_norm": 10.679413795471191,
249
- "learning_rate": 4.404332129963899e-05,
250
- "loss": 0.1409,
251
- "step": 330
252
- },
253
- {
254
- "epoch": 0.6128886885984678,
255
- "grad_norm": 11.381563186645508,
256
- "learning_rate": 4.386281588447654e-05,
257
- "loss": 0.2043,
258
- "step": 340
259
- },
260
- {
261
- "epoch": 0.6309148264984227,
262
- "grad_norm": 40.68292236328125,
263
- "learning_rate": 4.368231046931408e-05,
264
- "loss": 0.1288,
265
- "step": 350
266
- },
267
- {
268
- "epoch": 0.6489409643983777,
269
- "grad_norm": 17.46027946472168,
270
- "learning_rate": 4.350180505415163e-05,
271
- "loss": 0.1537,
272
- "step": 360
273
- },
274
- {
275
- "epoch": 0.6669671022983326,
276
- "grad_norm": 13.656320571899414,
277
- "learning_rate": 4.332129963898917e-05,
278
- "loss": 0.1201,
279
- "step": 370
280
- },
281
- {
282
- "epoch": 0.6849932401982876,
283
- "grad_norm": 20.412939071655273,
284
- "learning_rate": 4.314079422382672e-05,
285
- "loss": 0.1321,
286
- "step": 380
287
- },
288
- {
289
- "epoch": 0.7030193780982424,
290
- "grad_norm": 9.957433700561523,
291
- "learning_rate": 4.296028880866426e-05,
292
- "loss": 0.141,
293
- "step": 390
294
- },
295
- {
296
- "epoch": 0.7210455159981974,
297
- "grad_norm": 9.682558059692383,
298
- "learning_rate": 4.277978339350181e-05,
299
- "loss": 0.1607,
300
- "step": 400
301
- },
302
- {
303
- "epoch": 0.7390716538981523,
304
- "grad_norm": 9.733131408691406,
305
- "learning_rate": 4.259927797833935e-05,
306
- "loss": 0.1037,
307
- "step": 410
308
- },
309
- {
310
- "epoch": 0.7570977917981072,
311
- "grad_norm": 15.76230525970459,
312
- "learning_rate": 4.24187725631769e-05,
313
- "loss": 0.2101,
314
- "step": 420
315
- },
316
- {
317
- "epoch": 0.7751239296980622,
318
- "grad_norm": 22.166545867919922,
319
- "learning_rate": 4.223826714801444e-05,
320
- "loss": 0.1459,
321
- "step": 430
322
- },
323
- {
324
- "epoch": 0.7931500675980171,
325
- "grad_norm": 17.87278175354004,
326
- "learning_rate": 4.205776173285199e-05,
327
- "loss": 0.147,
328
- "step": 440
329
- },
330
- {
331
- "epoch": 0.8111762054979721,
332
- "grad_norm": 5.201169490814209,
333
- "learning_rate": 4.187725631768953e-05,
334
- "loss": 0.1247,
335
- "step": 450
336
- },
337
- {
338
- "epoch": 0.829202343397927,
339
- "grad_norm": 8.68251895904541,
340
- "learning_rate": 4.169675090252708e-05,
341
- "loss": 0.1172,
342
- "step": 460
343
- },
344
- {
345
- "epoch": 0.847228481297882,
346
- "grad_norm": 14.086528778076172,
347
- "learning_rate": 4.151624548736462e-05,
348
- "loss": 0.111,
349
- "step": 470
350
- },
351
- {
352
- "epoch": 0.8652546191978369,
353
- "grad_norm": 5.421584606170654,
354
- "learning_rate": 4.1335740072202167e-05,
355
- "loss": 0.1264,
356
- "step": 480
357
- },
358
- {
359
- "epoch": 0.8832807570977917,
360
- "grad_norm": 7.700821399688721,
361
- "learning_rate": 4.115523465703972e-05,
362
- "loss": 0.1278,
363
- "step": 490
364
- },
365
- {
366
- "epoch": 0.9013068949977467,
367
- "grad_norm": 9.820999145507812,
368
- "learning_rate": 4.0974729241877256e-05,
369
- "loss": 0.1294,
370
- "step": 500
371
- },
372
- {
373
- "epoch": 0.9193330328977016,
374
- "grad_norm": 15.016349792480469,
375
- "learning_rate": 4.079422382671481e-05,
376
- "loss": 0.1607,
377
- "step": 510
378
- },
379
- {
380
- "epoch": 0.9373591707976566,
381
- "grad_norm": 10.43041706085205,
382
- "learning_rate": 4.0613718411552346e-05,
383
- "loss": 0.1091,
384
- "step": 520
385
- },
386
- {
387
- "epoch": 0.9553853086976115,
388
- "grad_norm": 26.99540901184082,
389
- "learning_rate": 4.043321299638989e-05,
390
- "loss": 0.1442,
391
- "step": 530
392
- },
393
- {
394
- "epoch": 0.9734114465975665,
395
- "grad_norm": 8.318906784057617,
396
- "learning_rate": 4.0252707581227436e-05,
397
- "loss": 0.1233,
398
- "step": 540
399
- },
400
- {
401
- "epoch": 0.9914375844975214,
402
- "grad_norm": 8.85923957824707,
403
- "learning_rate": 4.007220216606498e-05,
404
- "loss": 0.1466,
405
- "step": 550
406
  }
407
  ],
408
- "logging_steps": 10,
409
- "max_steps": 2770,
410
  "num_input_tokens_seen": 0,
411
- "num_train_epochs": 5,
412
  "save_steps": 500,
413
  "stateful_callbacks": {
414
  "TrainerControl": {
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
+ "eval_steps": 500,
6
  "global_step": 555,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 0,
13
+ "eval_f1": 0.13882567137005006,
14
+ "eval_loss": 0.8569005727767944,
15
+ "eval_precision": 0.07566360704539817,
16
+ "eval_recall": 0.8402203856749312,
17
+ "eval_runtime": 50.8375,
18
+ "eval_samples_per_second": 91.92,
19
+ "eval_steps_per_second": 11.507,
20
  "step": 0
21
  },
22
  {
23
+ "epoch": 1.0,
24
+ "grad_norm": 18.535512924194336,
25
+ "learning_rate": 2.4954873646209387e-05,
26
+ "loss": 0.1595,
27
+ "step": 555
28
  },
29
  {
30
+ "epoch": 1.0,
31
+ "eval_f1": 0.7377049180327869,
32
+ "eval_loss": 0.11871597915887833,
33
+ "eval_precision": 0.6415478615071283,
34
+ "eval_recall": 0.8677685950413223,
35
+ "eval_runtime": 52.321,
36
+ "eval_samples_per_second": 89.314,
37
+ "eval_steps_per_second": 11.181,
38
+ "step": 555
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
  ],
41
+ "logging_steps": 500,
42
+ "max_steps": 1108,
43
  "num_input_tokens_seen": 0,
44
+ "num_train_epochs": 2,
45
  "save_steps": 500,
46
  "stateful_callbacks": {
47
  "TrainerControl": {
checkpoint-555/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1cb6c2831bf6310695ea4ef6539addf4fc4092fd2b8fd2fd7364adece0d315f
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fec65d435d82330639f2254c3c8dd619ceaf78855b73ed7360d46e29c00bb91
3
  size 5368