batterydata commited on
Commit
e2eab22
·
1 Parent(s): 080d91f
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_accuracy": 0.9779270380613723,
4
- "eval_f1": 0.966844646225661,
5
- "eval_loss": 0.15447764098644257,
6
- "eval_precision": 0.9669064748201439,
7
- "eval_recall": 0.9667828255379008,
8
- "eval_runtime": 6.5133,
9
  "eval_samples": 1451,
10
- "eval_samples_per_second": 222.775,
11
- "eval_steps_per_second": 13.971,
12
- "train_loss": 0.03180336131915158,
13
- "train_runtime": 835.9078,
14
  "train_samples": 13054,
15
- "train_samples_per_second": 234.248,
16
- "train_steps_per_second": 14.643
17
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_accuracy": 0.978401158962202,
4
+ "eval_f1": 0.96785434549785,
5
+ "eval_loss": 0.15796583890914917,
6
+ "eval_precision": 0.9678388746803069,
7
+ "eval_recall": 0.9678698168100003,
8
+ "eval_runtime": 3.7378,
9
  "eval_samples": 1451,
10
+ "eval_samples_per_second": 388.2,
11
+ "eval_steps_per_second": 24.346,
12
+ "train_loss": 0.03262665262993644,
13
+ "train_runtime": 785.1159,
14
  "train_samples": 13054,
15
+ "train_samples_per_second": 249.403,
16
+ "train_steps_per_second": 15.59
17
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/lus/theta-fs0/projects/SolarWindowsADSP/shu/models/revision/bert/batterybert-cased/",
3
  "architectures": [
4
  "BertForTokenClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "/lus/theta-fs0/projects/SolarWindowsADSP/shu/models/best_models/bert-base-cased/",
3
  "architectures": [
4
  "BertForTokenClassification"
5
  ],
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_accuracy": 0.9779270380613723,
4
- "eval_f1": 0.966844646225661,
5
- "eval_loss": 0.15447764098644257,
6
- "eval_precision": 0.9669064748201439,
7
- "eval_recall": 0.9667828255379008,
8
- "eval_runtime": 6.5133,
9
  "eval_samples": 1451,
10
- "eval_samples_per_second": 222.775,
11
- "eval_steps_per_second": 13.971
12
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_accuracy": 0.978401158962202,
4
+ "eval_f1": 0.96785434549785,
5
+ "eval_loss": 0.15796583890914917,
6
+ "eval_precision": 0.9678388746803069,
7
+ "eval_recall": 0.9678698168100003,
8
+ "eval_runtime": 3.7378,
9
  "eval_samples": 1451,
10
+ "eval_samples_per_second": 388.2,
11
+ "eval_steps_per_second": 24.346
12
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:221cfd68e45249bad38f49fffe8e8bf67e45d77610eab67421b946a4afc4a105
3
  size 431110641
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2501b95940948e0e7914b12893a4db2e51e91522e86608321b56de7825d840c
3
  size 431110641
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "/lus/theta-fs0/projects/SolarWindowsADSP/shu/models/revision/bert/batterybert-cased/", "tokenizer_class": "BertTokenizer"}
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "/lus/theta-fs0/projects/SolarWindowsADSP/shu/models/best_models/bert-base-cased/", "tokenizer_class": "BertTokenizer"}
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 15.0,
3
- "train_loss": 0.03180336131915158,
4
- "train_runtime": 835.9078,
5
  "train_samples": 13054,
6
- "train_samples_per_second": 234.248,
7
- "train_steps_per_second": 14.643
8
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "train_loss": 0.03262665262993644,
4
+ "train_runtime": 785.1159,
5
  "train_samples": 13054,
6
+ "train_samples_per_second": 249.403,
7
+ "train_steps_per_second": 15.59
8
  }
trainer_state.json CHANGED
@@ -10,335 +10,335 @@
10
  {
11
  "epoch": 0.61,
12
  "learning_rate": 1.9183006535947716e-05,
13
- "loss": 0.3785,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.9737126300539971,
19
- "eval_f1": 0.9626903878151799,
20
- "eval_loss": 0.09840826690196991,
21
- "eval_precision": 0.9635228182546037,
22
- "eval_recall": 0.9618593944819208,
23
- "eval_runtime": 3.9949,
24
- "eval_samples_per_second": 363.209,
25
- "eval_steps_per_second": 22.779,
26
  "step": 816
27
  },
28
  {
29
  "epoch": 1.23,
30
  "learning_rate": 1.8366013071895427e-05,
31
- "loss": 0.0836,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 1.84,
36
  "learning_rate": 1.7549019607843138e-05,
37
- "loss": 0.0641,
38
  "step": 1500
39
  },
40
  {
41
  "epoch": 2.0,
42
- "eval_accuracy": 0.975925194257869,
43
- "eval_f1": 0.9640177271490968,
44
- "eval_loss": 0.09886540472507477,
45
- "eval_precision": 0.9648667691519344,
46
- "eval_recall": 0.9631701780747466,
47
- "eval_runtime": 3.9485,
48
- "eval_samples_per_second": 367.479,
49
- "eval_steps_per_second": 23.047,
50
  "step": 1632
51
  },
52
  {
53
  "epoch": 2.45,
54
  "learning_rate": 1.6732026143790852e-05,
55
- "loss": 0.0458,
56
  "step": 2000
57
  },
58
  {
59
  "epoch": 3.0,
60
- "eval_accuracy": 0.976741735809298,
61
- "eval_f1": 0.9651566272766523,
62
- "eval_loss": 0.09389109164476395,
63
- "eval_precision": 0.9654808369057521,
64
- "eval_recall": 0.9648326353144282,
65
- "eval_runtime": 3.7903,
66
- "eval_samples_per_second": 382.817,
67
- "eval_steps_per_second": 24.008,
68
  "step": 2448
69
  },
70
  {
71
  "epoch": 3.06,
72
  "learning_rate": 1.5915032679738563e-05,
73
- "loss": 0.0394,
74
  "step": 2500
75
  },
76
  {
77
  "epoch": 3.68,
78
  "learning_rate": 1.5098039215686276e-05,
79
- "loss": 0.0279,
80
  "step": 3000
81
  },
82
  {
83
  "epoch": 4.0,
84
- "eval_accuracy": 0.9758725141577769,
85
- "eval_f1": 0.963916701321135,
86
- "eval_loss": 0.09818906337022781,
87
- "eval_precision": 0.9644720417373491,
88
- "eval_recall": 0.9633620000639407,
89
- "eval_runtime": 3.805,
90
- "eval_samples_per_second": 381.341,
91
- "eval_steps_per_second": 23.916,
92
  "step": 3264
93
  },
94
  {
95
  "epoch": 4.29,
96
  "learning_rate": 1.4281045751633989e-05,
97
- "loss": 0.0254,
98
  "step": 3500
99
  },
100
  {
101
  "epoch": 4.9,
102
  "learning_rate": 1.3464052287581701e-05,
103
- "loss": 0.0194,
104
  "step": 4000
105
  },
106
  {
107
  "epoch": 5.0,
108
- "eval_accuracy": 0.9765310154089293,
109
- "eval_f1": 0.9647630619684083,
110
- "eval_loss": 0.10570573806762695,
111
- "eval_precision": 0.9649173302631999,
112
- "eval_recall": 0.9646088429937019,
113
- "eval_runtime": 4.1667,
114
- "eval_samples_per_second": 348.241,
115
- "eval_steps_per_second": 21.84,
116
  "step": 4080
117
  },
118
  {
119
  "epoch": 5.51,
120
  "learning_rate": 1.2647058823529412e-05,
121
- "loss": 0.0141,
122
  "step": 4500
123
  },
124
  {
125
  "epoch": 6.0,
126
- "eval_accuracy": 0.9767680758593441,
127
- "eval_f1": 0.9650685041006539,
128
- "eval_loss": 0.11329083889722824,
129
- "eval_precision": 0.9651765157329241,
130
- "eval_recall": 0.9649605166405576,
131
- "eval_runtime": 3.8277,
132
- "eval_samples_per_second": 379.077,
133
- "eval_steps_per_second": 23.774,
134
  "step": 4896
135
  },
136
  {
137
  "epoch": 6.13,
138
  "learning_rate": 1.1830065359477125e-05,
139
- "loss": 0.0142,
140
  "step": 5000
141
  },
142
  {
143
  "epoch": 6.74,
144
  "learning_rate": 1.1013071895424838e-05,
145
- "loss": 0.0097,
146
  "step": 5500
147
  },
148
  {
149
  "epoch": 7.0,
150
- "eval_accuracy": 0.9780060582115107,
151
- "eval_f1": 0.9669496810092579,
152
- "eval_loss": 0.1174820214509964,
153
- "eval_precision": 0.9672125903652997,
154
- "eval_recall": 0.9666869145433038,
155
- "eval_runtime": 2.7647,
156
- "eval_samples_per_second": 524.836,
157
- "eval_steps_per_second": 32.915,
158
  "step": 5712
159
  },
160
  {
161
  "epoch": 7.35,
162
  "learning_rate": 1.0196078431372549e-05,
163
- "loss": 0.0085,
164
  "step": 6000
165
  },
166
  {
167
  "epoch": 7.97,
168
  "learning_rate": 9.379084967320261e-06,
169
- "loss": 0.0078,
170
  "step": 6500
171
  },
172
  {
173
  "epoch": 8.0,
174
- "eval_accuracy": 0.9770841564598973,
175
- "eval_f1": 0.965837913843794,
176
- "eval_loss": 0.12893585860729218,
177
- "eval_precision": 0.9654368311771283,
178
- "eval_recall": 0.9662393299018511,
179
- "eval_runtime": 3.7734,
180
- "eval_samples_per_second": 384.529,
181
- "eval_steps_per_second": 24.116,
182
  "step": 6528
183
  },
184
  {
185
  "epoch": 8.58,
186
  "learning_rate": 8.562091503267974e-06,
187
- "loss": 0.0061,
188
  "step": 7000
189
  },
190
  {
191
  "epoch": 9.0,
192
- "eval_accuracy": 0.9767680758593441,
193
- "eval_f1": 0.9654114677360088,
194
- "eval_loss": 0.1360652893781662,
195
- "eval_precision": 0.9648719422622469,
196
- "eval_recall": 0.96595159691806,
197
- "eval_runtime": 4.9673,
198
- "eval_samples_per_second": 292.108,
199
- "eval_steps_per_second": 18.32,
200
  "step": 7344
201
  },
202
  {
203
  "epoch": 9.19,
204
  "learning_rate": 7.745098039215687e-06,
205
- "loss": 0.0057,
206
  "step": 7500
207
  },
208
  {
209
  "epoch": 9.8,
210
  "learning_rate": 6.928104575163399e-06,
211
- "loss": 0.0046,
212
  "step": 8000
213
  },
214
  {
215
  "epoch": 10.0,
216
- "eval_accuracy": 0.976741735809298,
217
- "eval_f1": 0.9652162792928163,
218
- "eval_loss": 0.14587946236133575,
219
- "eval_precision": 0.9652162792928163,
220
- "eval_recall": 0.9652162792928163,
221
- "eval_runtime": 2.7677,
222
- "eval_samples_per_second": 524.265,
223
- "eval_steps_per_second": 32.879,
224
  "step": 8160
225
  },
226
  {
227
  "epoch": 10.42,
228
  "learning_rate": 6.111111111111112e-06,
229
- "loss": 0.0042,
230
  "step": 8500
231
  },
232
  {
233
  "epoch": 11.0,
234
- "eval_accuracy": 0.977848017911234,
235
- "eval_f1": 0.9667578068786875,
236
- "eval_loss": 0.14097169041633606,
237
- "eval_precision": 0.9670206640649991,
238
- "eval_recall": 0.9664950925541098,
239
- "eval_runtime": 2.8235,
240
- "eval_samples_per_second": 513.9,
241
- "eval_steps_per_second": 32.229,
242
  "step": 8976
243
  },
244
  {
245
  "epoch": 11.03,
246
  "learning_rate": 5.294117647058824e-06,
247
- "loss": 0.0041,
248
  "step": 9000
249
  },
250
  {
251
  "epoch": 11.64,
252
  "learning_rate": 4.477124183006537e-06,
253
- "loss": 0.0034,
254
  "step": 9500
255
  },
256
  {
257
  "epoch": 12.0,
258
- "eval_accuracy": 0.9781377584617411,
259
- "eval_f1": 0.9671292447400396,
260
- "eval_loss": 0.14326775074005127,
261
- "eval_precision": 0.9672838913940325,
262
- "eval_recall": 0.9669746475270948,
263
- "eval_runtime": 3.9931,
264
- "eval_samples_per_second": 363.381,
265
- "eval_steps_per_second": 22.79,
266
  "step": 9792
267
  },
268
  {
269
  "epoch": 12.25,
270
  "learning_rate": 3.6601307189542484e-06,
271
- "loss": 0.0027,
272
  "step": 10000
273
  },
274
  {
275
  "epoch": 12.87,
276
  "learning_rate": 2.843137254901961e-06,
277
- "loss": 0.0026,
278
  "step": 10500
279
  },
280
  {
281
  "epoch": 13.0,
282
- "eval_accuracy": 0.9774529171605426,
283
- "eval_f1": 0.9662260441474992,
284
- "eval_loss": 0.15390940010547638,
285
- "eval_precision": 0.9661488300728807,
286
- "eval_recall": 0.9663032705649157,
287
- "eval_runtime": 3.7949,
288
- "eval_samples_per_second": 382.357,
289
- "eval_steps_per_second": 23.98,
290
  "step": 10608
291
  },
292
  {
293
  "epoch": 13.48,
294
  "learning_rate": 2.0261437908496734e-06,
295
- "loss": 0.0022,
296
  "step": 11000
297
  },
298
  {
299
  "epoch": 14.0,
300
- "eval_accuracy": 0.9776109574608192,
301
- "eval_f1": 0.9663320117662106,
302
- "eval_loss": 0.15523000061511993,
303
- "eval_precision": 0.9664247114124005,
304
- "eval_recall": 0.9662393299018511,
305
- "eval_runtime": 3.7736,
306
- "eval_samples_per_second": 384.509,
307
- "eval_steps_per_second": 24.115,
308
  "step": 11424
309
  },
310
  {
311
  "epoch": 14.09,
312
  "learning_rate": 1.2091503267973858e-06,
313
- "loss": 0.0021,
314
  "step": 11500
315
  },
316
  {
317
  "epoch": 14.71,
318
  "learning_rate": 3.921568627450981e-07,
319
- "loss": 0.0018,
320
  "step": 12000
321
  },
322
  {
323
  "epoch": 15.0,
324
- "eval_accuracy": 0.9779270380613723,
325
- "eval_f1": 0.966844646225661,
326
- "eval_loss": 0.15447764098644257,
327
- "eval_precision": 0.9669064748201439,
328
- "eval_recall": 0.9667828255379008,
329
- "eval_runtime": 3.7735,
330
- "eval_samples_per_second": 384.523,
331
- "eval_steps_per_second": 24.116,
332
  "step": 12240
333
  },
334
  {
335
  "epoch": 15.0,
336
  "step": 12240,
337
  "total_flos": 5.119363918698906e+16,
338
- "train_loss": 0.03180336131915158,
339
- "train_runtime": 835.9078,
340
- "train_samples_per_second": 234.248,
341
- "train_steps_per_second": 14.643
342
  }
343
  ],
344
  "max_steps": 12240,
 
10
  {
11
  "epoch": 0.61,
12
  "learning_rate": 1.9183006535947716e-05,
13
+ "loss": 0.4329,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.9725273278019229,
19
+ "eval_f1": 0.9629807384654764,
20
+ "eval_loss": 0.10430943965911865,
21
+ "eval_precision": 0.9637516411028211,
22
+ "eval_recall": 0.9622110681287765,
23
+ "eval_runtime": 2.936,
24
+ "eval_samples_per_second": 494.214,
25
+ "eval_steps_per_second": 30.995,
26
  "step": 816
27
  },
28
  {
29
  "epoch": 1.23,
30
  "learning_rate": 1.8366013071895427e-05,
31
+ "loss": 0.0811,
32
  "step": 1000
33
  },
34
  {
35
  "epoch": 1.84,
36
  "learning_rate": 1.7549019607843138e-05,
37
+ "loss": 0.0607,
38
  "step": 1500
39
  },
40
  {
41
  "epoch": 2.0,
42
+ "eval_accuracy": 0.9751349927564862,
43
+ "eval_f1": 0.9635150914122107,
44
+ "eval_loss": 0.09400150179862976,
45
+ "eval_precision": 0.9641165172855314,
46
+ "eval_recall": 0.9629144154224879,
47
+ "eval_runtime": 2.7493,
48
+ "eval_samples_per_second": 527.777,
49
+ "eval_steps_per_second": 33.1,
50
  "step": 1632
51
  },
52
  {
53
  "epoch": 2.45,
54
  "learning_rate": 1.6732026143790852e-05,
55
+ "loss": 0.0428,
56
  "step": 2000
57
  },
58
  {
59
  "epoch": 3.0,
60
+ "eval_accuracy": 0.9755037534571316,
61
+ "eval_f1": 0.9632957125043962,
62
+ "eval_loss": 0.09555820375680923,
63
+ "eval_precision": 0.9633573141486811,
64
+ "eval_recall": 0.9632341187378113,
65
+ "eval_runtime": 2.7415,
66
+ "eval_samples_per_second": 529.269,
67
+ "eval_steps_per_second": 33.193,
68
  "step": 2448
69
  },
70
  {
71
  "epoch": 3.06,
72
  "learning_rate": 1.5915032679738563e-05,
73
+ "loss": 0.0363,
74
  "step": 2500
75
  },
76
  {
77
  "epoch": 3.68,
78
  "learning_rate": 1.5098039215686276e-05,
79
+ "loss": 0.0249,
80
  "step": 3000
81
  },
82
  {
83
  "epoch": 4.0,
84
+ "eval_accuracy": 0.9765046753588832,
85
+ "eval_f1": 0.9649540551338394,
86
+ "eval_loss": 0.0989251509308815,
87
+ "eval_precision": 0.9646919734151329,
88
+ "eval_recall": 0.9652162792928163,
89
+ "eval_runtime": 2.7759,
90
+ "eval_samples_per_second": 522.708,
91
+ "eval_steps_per_second": 32.782,
92
  "step": 3264
93
  },
94
  {
95
  "epoch": 4.29,
96
  "learning_rate": 1.4281045751633989e-05,
97
+ "loss": 0.0221,
98
  "step": 3500
99
  },
100
  {
101
  "epoch": 4.9,
102
  "learning_rate": 1.3464052287581701e-05,
103
+ "loss": 0.0177,
104
  "step": 4000
105
  },
106
  {
107
  "epoch": 5.0,
108
+ "eval_accuracy": 0.976451995258791,
109
+ "eval_f1": 0.9646675296035285,
110
+ "eval_loss": 0.10913769155740738,
111
+ "eval_precision": 0.9643747204294204,
112
+ "eval_recall": 0.9649605166405576,
113
+ "eval_runtime": 3.7551,
114
+ "eval_samples_per_second": 386.403,
115
+ "eval_steps_per_second": 24.233,
116
  "step": 4080
117
  },
118
  {
119
  "epoch": 5.51,
120
  "learning_rate": 1.2647058823529412e-05,
121
+ "loss": 0.0129,
122
  "step": 4500
123
  },
124
  {
125
  "epoch": 6.0,
126
+ "eval_accuracy": 0.9780060582115107,
127
+ "eval_f1": 0.9675017575254042,
128
+ "eval_loss": 0.11124212294816971,
129
+ "eval_precision": 0.967038231818327,
130
+ "eval_recall": 0.9679657278045973,
131
+ "eval_runtime": 3.8249,
132
+ "eval_samples_per_second": 379.357,
133
+ "eval_steps_per_second": 23.792,
134
  "step": 4896
135
  },
136
  {
137
  "epoch": 6.13,
138
  "learning_rate": 1.1830065359477125e-05,
139
+ "loss": 0.0119,
140
  "step": 5000
141
  },
142
  {
143
  "epoch": 6.74,
144
  "learning_rate": 1.1013071895424838e-05,
145
+ "loss": 0.0084,
146
  "step": 5500
147
  },
148
  {
149
  "epoch": 7.0,
150
+ "eval_accuracy": 0.9783221388120638,
151
+ "eval_f1": 0.9677996994212259,
152
+ "eval_loss": 0.12139276415109634,
153
+ "eval_precision": 0.967985415933732,
154
+ "eval_recall": 0.9676140541577416,
155
+ "eval_runtime": 3.747,
156
+ "eval_samples_per_second": 387.246,
157
+ "eval_steps_per_second": 24.286,
158
  "step": 5712
159
  },
160
  {
161
  "epoch": 7.35,
162
  "learning_rate": 1.0196078431372549e-05,
163
+ "loss": 0.0076,
164
  "step": 6000
165
  },
166
  {
167
  "epoch": 7.97,
168
  "learning_rate": 9.379084967320261e-06,
169
+ "loss": 0.0061,
170
  "step": 6500
171
  },
172
  {
173
  "epoch": 8.0,
174
+ "eval_accuracy": 0.9775319373106809,
175
+ "eval_f1": 0.9664444551020734,
176
+ "eval_loss": 0.13343702256679535,
177
+ "eval_precision": 0.9665217113257019,
178
+ "eval_recall": 0.9663672112279804,
179
+ "eval_runtime": 2.7649,
180
+ "eval_samples_per_second": 524.787,
181
+ "eval_steps_per_second": 32.912,
182
  "step": 6528
183
  },
184
  {
185
  "epoch": 8.58,
186
  "learning_rate": 8.562091503267974e-06,
187
+ "loss": 0.0048,
188
  "step": 7000
189
  },
190
  {
191
  "epoch": 9.0,
192
+ "eval_accuracy": 0.9775055972606348,
193
+ "eval_f1": 0.9661577811525858,
194
+ "eval_loss": 0.13892702758312225,
195
+ "eval_precision": 0.9662041181736795,
196
+ "eval_recall": 0.9661114485757217,
197
+ "eval_runtime": 3.8371,
198
+ "eval_samples_per_second": 378.146,
199
+ "eval_steps_per_second": 23.716,
200
  "step": 7344
201
  },
202
  {
203
  "epoch": 9.19,
204
  "learning_rate": 7.745098039215687e-06,
205
+ "loss": 0.0047,
206
  "step": 7500
207
  },
208
  {
209
  "epoch": 9.8,
210
  "learning_rate": 6.928104575163399e-06,
211
+ "loss": 0.0044,
212
  "step": 8000
213
  },
214
  {
215
  "epoch": 10.0,
216
+ "eval_accuracy": 0.9781640985117872,
217
+ "eval_f1": 0.967271564473579,
218
+ "eval_loss": 0.14489226043224335,
219
+ "eval_precision": 0.9674726540011515,
220
+ "eval_recall": 0.9670705585216919,
221
+ "eval_runtime": 3.7406,
222
+ "eval_samples_per_second": 387.909,
223
+ "eval_steps_per_second": 24.328,
224
  "step": 8160
225
  },
226
  {
227
  "epoch": 10.42,
228
  "learning_rate": 6.111111111111112e-06,
229
+ "loss": 0.0034,
230
  "step": 8500
231
  },
232
  {
233
  "epoch": 11.0,
234
+ "eval_accuracy": 0.9779797181614646,
235
+ "eval_f1": 0.9672149490880608,
236
+ "eval_loss": 0.14611582458019257,
237
+ "eval_precision": 0.9671994884910486,
238
+ "eval_recall": 0.9672304101793535,
239
+ "eval_runtime": 2.8969,
240
+ "eval_samples_per_second": 500.886,
241
+ "eval_steps_per_second": 31.413,
242
  "step": 8976
243
  },
244
  {
245
  "epoch": 11.03,
246
  "learning_rate": 5.294117647058824e-06,
247
+ "loss": 0.0034,
248
  "step": 9000
249
  },
250
  {
251
  "epoch": 11.64,
252
  "learning_rate": 4.477124183006537e-06,
253
+ "loss": 0.0025,
254
  "step": 9500
255
  },
256
  {
257
  "epoch": 12.0,
258
+ "eval_accuracy": 0.9784274990122481,
259
+ "eval_f1": 0.9676501159350764,
260
+ "eval_loss": 0.14966140687465668,
261
+ "eval_precision": 0.9680061428205784,
262
+ "eval_recall": 0.9672943508424182,
263
+ "eval_runtime": 2.7972,
264
+ "eval_samples_per_second": 518.731,
265
+ "eval_steps_per_second": 32.532,
266
  "step": 9792
267
  },
268
  {
269
  "epoch": 12.25,
270
  "learning_rate": 3.6601307189542484e-06,
271
+ "loss": 0.0022,
272
  "step": 10000
273
  },
274
  {
275
  "epoch": 12.87,
276
  "learning_rate": 2.843137254901961e-06,
277
+ "loss": 0.0024,
278
  "step": 10500
279
  },
280
  {
281
  "epoch": 13.0,
282
+ "eval_accuracy": 0.977848017911234,
283
+ "eval_f1": 0.9670199670679264,
284
+ "eval_loss": 0.15780866146087646,
285
+ "eval_precision": 0.9670972692971798,
286
+ "eval_recall": 0.9669426771955625,
287
+ "eval_runtime": 3.8111,
288
+ "eval_samples_per_second": 380.734,
289
+ "eval_steps_per_second": 23.878,
290
  "step": 10608
291
  },
292
  {
293
  "epoch": 13.48,
294
  "learning_rate": 2.0261437908496734e-06,
295
+ "loss": 0.0014,
296
  "step": 11000
297
  },
298
  {
299
  "epoch": 14.0,
300
+ "eval_accuracy": 0.978111418411695,
301
+ "eval_f1": 0.9674521213671389,
302
+ "eval_loss": 0.15866732597351074,
303
+ "eval_precision": 0.9675139888089528,
304
+ "eval_recall": 0.9673902618370153,
305
+ "eval_runtime": 2.8382,
306
+ "eval_samples_per_second": 511.243,
307
+ "eval_steps_per_second": 32.063,
308
  "step": 11424
309
  },
310
  {
311
  "epoch": 14.09,
312
  "learning_rate": 1.2091503267973858e-06,
313
+ "loss": 0.0019,
314
  "step": 11500
315
  },
316
  {
317
  "epoch": 14.71,
318
  "learning_rate": 3.921568627450981e-07,
319
+ "loss": 0.0016,
320
  "step": 12000
321
  },
322
  {
323
  "epoch": 15.0,
324
+ "eval_accuracy": 0.978401158962202,
325
+ "eval_f1": 0.96785434549785,
326
+ "eval_loss": 0.15796583890914917,
327
+ "eval_precision": 0.9678388746803069,
328
+ "eval_recall": 0.9678698168100003,
329
+ "eval_runtime": 3.7576,
330
+ "eval_samples_per_second": 386.151,
331
+ "eval_steps_per_second": 24.218,
332
  "step": 12240
333
  },
334
  {
335
  "epoch": 15.0,
336
  "step": 12240,
337
  "total_flos": 5.119363918698906e+16,
338
+ "train_loss": 0.03262665262993644,
339
+ "train_runtime": 785.1159,
340
+ "train_samples_per_second": 249.403,
341
+ "train_steps_per_second": 15.59
342
  }
343
  ],
344
  "max_steps": 12240,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28805bbddbea78cbc7984bc64d6c51834153424f84a3475453bb119c607a9e77
3
  size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2ad2280933fd2dfde8ee13fc62c6b195937dddc4b2505fce56231e549d3c475
3
  size 3119