pszemraj commited on
Commit
cd04162
1 Parent(s): 64f3204

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +5 -5
  4. trainer_state.json +323 -143
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_f1": 0.46636259977194994,
4
- "eval_loss": 0.25748351216316223,
5
- "eval_runtime": 1.8285,
6
  "eval_samples": 989,
7
- "eval_samples_per_second": 540.873,
8
- "eval_steps_per_second": 8.75,
9
- "train_loss": 0.3167446336438579,
10
- "train_runtime": 225.8975,
11
  "train_samples": 7914,
12
- "train_samples_per_second": 175.168,
13
- "train_steps_per_second": 1.372
14
  }
 
1
  {
2
+ "epoch": 4.97,
3
+ "eval_f1": 0.7058281501958075,
4
+ "eval_loss": 0.1904972940683365,
5
+ "eval_runtime": 42.3826,
6
  "eval_samples": 989,
7
+ "eval_samples_per_second": 23.335,
8
+ "eval_steps_per_second": 1.463,
9
+ "train_loss": 0.2132105562745071,
10
+ "train_runtime": 4145.08,
11
  "train_samples": 7914,
12
+ "train_samples_per_second": 9.546,
13
+ "train_steps_per_second": 0.148
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_f1": 0.46636259977194994,
4
- "eval_loss": 0.25748351216316223,
5
- "eval_runtime": 1.8285,
6
  "eval_samples": 989,
7
- "eval_samples_per_second": 540.873,
8
- "eval_steps_per_second": 8.75
9
  }
 
1
  {
2
+ "epoch": 4.97,
3
+ "eval_f1": 0.7058281501958075,
4
+ "eval_loss": 0.1904972940683365,
5
+ "eval_runtime": 42.3826,
6
  "eval_samples": 989,
7
+ "eval_samples_per_second": 23.335,
8
+ "eval_steps_per_second": 1.463
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 0.3167446336438579,
4
- "train_runtime": 225.8975,
5
  "train_samples": 7914,
6
- "train_samples_per_second": 175.168,
7
- "train_steps_per_second": 1.372
8
  }
 
1
  {
2
+ "epoch": 4.97,
3
+ "train_loss": 0.2132105562745071,
4
+ "train_runtime": 4145.08,
5
  "train_samples": 7914,
6
+ "train_samples_per_second": 9.546,
7
+ "train_steps_per_second": 0.148
8
  }
trainer_state.json CHANGED
@@ -1,259 +1,439 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 310,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.16,
13
- "learning_rate": 1.935483870967742e-05,
14
- "loss": 0.6525,
15
  "step": 10
16
  },
17
  {
18
- "epoch": 0.32,
19
- "learning_rate": 1.870967741935484e-05,
20
- "loss": 0.5089,
21
  "step": 20
22
  },
23
  {
24
- "epoch": 0.48,
25
- "learning_rate": 1.806451612903226e-05,
26
- "loss": 0.422,
27
  "step": 30
28
  },
29
  {
30
- "epoch": 0.65,
31
- "learning_rate": 1.741935483870968e-05,
32
- "loss": 0.3803,
33
  "step": 40
34
  },
35
  {
36
- "epoch": 0.81,
37
- "learning_rate": 1.6774193548387098e-05,
38
- "loss": 0.3615,
39
  "step": 50
40
  },
41
  {
42
- "epoch": 0.97,
43
- "learning_rate": 1.6129032258064517e-05,
44
- "loss": 0.3453,
45
  "step": 60
46
  },
47
  {
48
- "epoch": 1.0,
49
- "eval_f1": 0.2621971457019582,
50
- "eval_loss": 0.3179258704185486,
51
- "eval_runtime": 1.9933,
52
- "eval_samples_per_second": 496.167,
53
- "eval_steps_per_second": 8.027,
54
- "step": 62
55
- },
56
- {
57
- "epoch": 1.13,
58
- "learning_rate": 1.5483870967741936e-05,
59
- "loss": 0.3319,
60
  "step": 70
61
  },
62
  {
63
- "epoch": 1.29,
64
- "learning_rate": 1.4838709677419357e-05,
65
- "loss": 0.3196,
66
  "step": 80
67
  },
68
  {
69
- "epoch": 1.45,
70
- "learning_rate": 1.4193548387096776e-05,
71
- "loss": 0.3168,
72
  "step": 90
73
  },
74
  {
75
- "epoch": 1.61,
76
- "learning_rate": 1.3548387096774194e-05,
77
- "loss": 0.3119,
78
  "step": 100
79
  },
80
  {
81
- "epoch": 1.77,
82
- "learning_rate": 1.2903225806451613e-05,
83
- "loss": 0.3049,
84
  "step": 110
85
  },
86
  {
87
- "epoch": 1.94,
88
- "learning_rate": 1.2258064516129034e-05,
89
- "loss": 0.3017,
90
  "step": 120
91
  },
92
  {
93
- "epoch": 2.0,
94
- "eval_f1": 0.37271619975639464,
95
- "eval_loss": 0.2839711308479309,
96
- "eval_runtime": 1.8261,
97
- "eval_samples_per_second": 541.593,
98
- "eval_steps_per_second": 8.762,
99
- "step": 124
100
  },
101
  {
102
- "epoch": 2.1,
103
- "learning_rate": 1.1612903225806453e-05,
104
- "loss": 0.2931,
105
  "step": 130
106
  },
107
  {
108
- "epoch": 2.26,
109
- "learning_rate": 1.096774193548387e-05,
110
- "loss": 0.2924,
111
  "step": 140
112
  },
113
  {
114
- "epoch": 2.42,
115
- "learning_rate": 1.0322580645161291e-05,
116
- "loss": 0.2911,
117
  "step": 150
118
  },
119
  {
120
- "epoch": 2.58,
121
- "learning_rate": 9.67741935483871e-06,
122
- "loss": 0.2857,
123
  "step": 160
124
  },
125
  {
126
- "epoch": 2.74,
127
- "learning_rate": 9.03225806451613e-06,
128
- "loss": 0.2855,
129
  "step": 170
130
  },
131
  {
132
- "epoch": 2.9,
133
- "learning_rate": 8.387096774193549e-06,
134
- "loss": 0.2828,
135
  "step": 180
136
  },
137
  {
138
- "epoch": 3.0,
139
- "eval_f1": 0.4075178997613365,
140
- "eval_loss": 0.27112114429473877,
141
- "eval_runtime": 1.8327,
142
- "eval_samples_per_second": 539.642,
143
- "eval_steps_per_second": 8.73,
144
- "step": 186
145
- },
146
- {
147
- "epoch": 3.06,
148
- "learning_rate": 7.741935483870968e-06,
149
- "loss": 0.2874,
150
  "step": 190
151
  },
152
  {
153
- "epoch": 3.23,
154
- "learning_rate": 7.096774193548388e-06,
155
- "loss": 0.2774,
156
  "step": 200
157
  },
158
  {
159
- "epoch": 3.39,
160
- "learning_rate": 6.451612903225806e-06,
161
- "loss": 0.2729,
162
  "step": 210
163
  },
164
  {
165
- "epoch": 3.55,
166
- "learning_rate": 5.806451612903226e-06,
167
- "loss": 0.2784,
168
  "step": 220
169
  },
170
  {
171
- "epoch": 3.71,
172
- "learning_rate": 5.161290322580646e-06,
173
- "loss": 0.2726,
174
  "step": 230
175
  },
176
  {
177
- "epoch": 3.87,
178
- "learning_rate": 4.516129032258065e-06,
179
- "loss": 0.2723,
180
  "step": 240
181
  },
182
  {
183
- "epoch": 4.0,
184
- "eval_f1": 0.4506226469736461,
185
- "eval_loss": 0.26035064458847046,
186
- "eval_runtime": 1.8326,
187
- "eval_samples_per_second": 539.658,
188
- "eval_steps_per_second": 8.731,
189
- "step": 248
190
  },
191
  {
192
- "epoch": 4.03,
193
- "learning_rate": 3.870967741935484e-06,
194
- "loss": 0.2688,
195
  "step": 250
196
  },
197
  {
198
- "epoch": 4.19,
199
- "learning_rate": 3.225806451612903e-06,
200
- "loss": 0.268,
201
  "step": 260
202
  },
203
  {
204
- "epoch": 4.35,
205
- "learning_rate": 2.580645161290323e-06,
206
- "loss": 0.266,
207
  "step": 270
208
  },
209
  {
210
- "epoch": 4.52,
211
- "learning_rate": 1.935483870967742e-06,
212
- "loss": 0.2657,
213
  "step": 280
214
  },
215
  {
216
- "epoch": 4.68,
217
- "learning_rate": 1.2903225806451614e-06,
218
- "loss": 0.2687,
219
  "step": 290
220
  },
221
  {
222
- "epoch": 4.84,
223
- "learning_rate": 6.451612903225807e-07,
224
- "loss": 0.2679,
225
  "step": 300
226
  },
227
  {
228
- "epoch": 5.0,
229
- "learning_rate": 0.0,
230
- "loss": 0.2653,
231
  "step": 310
232
  },
233
  {
234
- "epoch": 5.0,
235
- "eval_f1": 0.46636259977194994,
236
- "eval_loss": 0.25748351216316223,
237
- "eval_runtime": 1.8325,
238
- "eval_samples_per_second": 539.71,
239
- "eval_steps_per_second": 8.731,
240
- "step": 310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  },
242
  {
243
- "epoch": 5.0,
244
- "step": 310,
245
- "total_flos": 5243230627246080.0,
246
- "train_loss": 0.3167446336438579,
247
- "train_runtime": 225.8975,
248
- "train_samples_per_second": 175.168,
249
- "train_steps_per_second": 1.372
250
  }
251
  ],
252
  "logging_steps": 10,
253
- "max_steps": 310,
254
  "num_train_epochs": 5,
255
  "save_steps": 500,
256
- "total_flos": 5243230627246080.0,
257
  "trial_name": null,
258
  "trial_params": null
259
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.96969696969697,
5
  "eval_steps": 500,
6
+ "global_step": 615,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.08,
13
+ "learning_rate": 1.9674796747967483e-05,
14
+ "loss": 0.6473,
15
  "step": 10
16
  },
17
  {
18
+ "epoch": 0.16,
19
+ "learning_rate": 1.934959349593496e-05,
20
+ "loss": 0.414,
21
  "step": 20
22
  },
23
  {
24
+ "epoch": 0.24,
25
+ "learning_rate": 1.902439024390244e-05,
26
+ "loss": 0.3542,
27
  "step": 30
28
  },
29
  {
30
+ "epoch": 0.32,
31
+ "learning_rate": 1.869918699186992e-05,
32
+ "loss": 0.3396,
33
  "step": 40
34
  },
35
  {
36
+ "epoch": 0.4,
37
+ "learning_rate": 1.83739837398374e-05,
38
+ "loss": 0.3274,
39
  "step": 50
40
  },
41
  {
42
+ "epoch": 0.48,
43
+ "learning_rate": 1.804878048780488e-05,
44
+ "loss": 0.3238,
45
  "step": 60
46
  },
47
  {
48
+ "epoch": 0.57,
49
+ "learning_rate": 1.772357723577236e-05,
50
+ "loss": 0.3495,
 
 
 
 
 
 
 
 
 
51
  "step": 70
52
  },
53
  {
54
+ "epoch": 0.65,
55
+ "learning_rate": 1.739837398373984e-05,
56
+ "loss": 0.3068,
57
  "step": 80
58
  },
59
  {
60
+ "epoch": 0.73,
61
+ "learning_rate": 1.7073170731707317e-05,
62
+ "loss": 0.3164,
63
  "step": 90
64
  },
65
  {
66
+ "epoch": 0.81,
67
+ "learning_rate": 1.6747967479674798e-05,
68
+ "loss": 0.2989,
69
  "step": 100
70
  },
71
  {
72
+ "epoch": 0.89,
73
+ "learning_rate": 1.642276422764228e-05,
74
+ "loss": 0.287,
75
  "step": 110
76
  },
77
  {
78
+ "epoch": 0.97,
79
+ "learning_rate": 1.6097560975609757e-05,
80
+ "loss": 0.2903,
81
  "step": 120
82
  },
83
  {
84
+ "epoch": 0.99,
85
+ "eval_f1": 0.40107752170008987,
86
+ "eval_loss": 0.26861757040023804,
87
+ "eval_runtime": 43.0151,
88
+ "eval_samples_per_second": 22.992,
89
+ "eval_steps_per_second": 1.441,
90
+ "step": 123
91
  },
92
  {
93
+ "epoch": 1.05,
94
+ "learning_rate": 1.5772357723577235e-05,
95
+ "loss": 0.2869,
96
  "step": 130
97
  },
98
  {
99
+ "epoch": 1.13,
100
+ "learning_rate": 1.5447154471544717e-05,
101
+ "loss": 0.2709,
102
  "step": 140
103
  },
104
  {
105
+ "epoch": 1.21,
106
+ "learning_rate": 1.5121951219512196e-05,
107
+ "loss": 0.2599,
108
  "step": 150
109
  },
110
  {
111
+ "epoch": 1.29,
112
+ "learning_rate": 1.4796747967479676e-05,
113
+ "loss": 0.2565,
114
  "step": 160
115
  },
116
  {
117
+ "epoch": 1.37,
118
+ "learning_rate": 1.4471544715447157e-05,
119
+ "loss": 0.2597,
120
  "step": 170
121
  },
122
  {
123
+ "epoch": 1.45,
124
+ "learning_rate": 1.4146341463414635e-05,
125
+ "loss": 0.2525,
126
  "step": 180
127
  },
128
  {
129
+ "epoch": 1.54,
130
+ "learning_rate": 1.3821138211382115e-05,
131
+ "loss": 0.2519,
 
 
 
 
 
 
 
 
 
132
  "step": 190
133
  },
134
  {
135
+ "epoch": 1.62,
136
+ "learning_rate": 1.3495934959349594e-05,
137
+ "loss": 0.2366,
138
  "step": 200
139
  },
140
  {
141
+ "epoch": 1.7,
142
+ "learning_rate": 1.3170731707317076e-05,
143
+ "loss": 0.2292,
144
  "step": 210
145
  },
146
  {
147
+ "epoch": 1.78,
148
+ "learning_rate": 1.2845528455284555e-05,
149
+ "loss": 0.2214,
150
  "step": 220
151
  },
152
  {
153
+ "epoch": 1.86,
154
+ "learning_rate": 1.2520325203252033e-05,
155
+ "loss": 0.2193,
156
  "step": 230
157
  },
158
  {
159
+ "epoch": 1.94,
160
+ "learning_rate": 1.2195121951219513e-05,
161
+ "loss": 0.2171,
162
  "step": 240
163
  },
164
  {
165
+ "epoch": 2.0,
166
+ "eval_f1": 0.6493130874909617,
167
+ "eval_loss": 0.2168290913105011,
168
+ "eval_runtime": 43.1471,
169
+ "eval_samples_per_second": 22.922,
170
+ "eval_steps_per_second": 1.437,
171
+ "step": 247
172
  },
173
  {
174
+ "epoch": 2.02,
175
+ "learning_rate": 1.1869918699186992e-05,
176
+ "loss": 0.1994,
177
  "step": 250
178
  },
179
  {
180
+ "epoch": 2.1,
181
+ "learning_rate": 1.1544715447154474e-05,
182
+ "loss": 0.204,
183
  "step": 260
184
  },
185
  {
186
+ "epoch": 2.18,
187
+ "learning_rate": 1.1219512195121953e-05,
188
+ "loss": 0.1909,
189
  "step": 270
190
  },
191
  {
192
+ "epoch": 2.26,
193
+ "learning_rate": 1.0894308943089431e-05,
194
+ "loss": 0.1906,
195
  "step": 280
196
  },
197
  {
198
+ "epoch": 2.34,
199
+ "learning_rate": 1.0569105691056911e-05,
200
+ "loss": 0.1919,
201
  "step": 290
202
  },
203
  {
204
+ "epoch": 2.42,
205
+ "learning_rate": 1.024390243902439e-05,
206
+ "loss": 0.1825,
207
  "step": 300
208
  },
209
  {
210
+ "epoch": 2.51,
211
+ "learning_rate": 9.91869918699187e-06,
212
+ "loss": 0.1839,
213
  "step": 310
214
  },
215
  {
216
+ "epoch": 2.59,
217
+ "learning_rate": 9.59349593495935e-06,
218
+ "loss": 0.189,
219
+ "step": 320
220
+ },
221
+ {
222
+ "epoch": 2.67,
223
+ "learning_rate": 9.268292682926831e-06,
224
+ "loss": 0.178,
225
+ "step": 330
226
+ },
227
+ {
228
+ "epoch": 2.75,
229
+ "learning_rate": 8.94308943089431e-06,
230
+ "loss": 0.1903,
231
+ "step": 340
232
+ },
233
+ {
234
+ "epoch": 2.83,
235
+ "learning_rate": 8.617886178861789e-06,
236
+ "loss": 0.1835,
237
+ "step": 350
238
+ },
239
+ {
240
+ "epoch": 2.91,
241
+ "learning_rate": 8.292682926829268e-06,
242
+ "loss": 0.1722,
243
+ "step": 360
244
+ },
245
+ {
246
+ "epoch": 2.99,
247
+ "learning_rate": 7.967479674796748e-06,
248
+ "loss": 0.1879,
249
+ "step": 370
250
+ },
251
+ {
252
+ "epoch": 3.0,
253
+ "eval_f1": 0.6612224696857214,
254
+ "eval_loss": 0.19899873435497284,
255
+ "eval_runtime": 43.1679,
256
+ "eval_samples_per_second": 22.911,
257
+ "eval_steps_per_second": 1.436,
258
+ "step": 371
259
+ },
260
+ {
261
+ "epoch": 3.07,
262
+ "learning_rate": 7.64227642276423e-06,
263
+ "loss": 0.1593,
264
+ "step": 380
265
+ },
266
+ {
267
+ "epoch": 3.15,
268
+ "learning_rate": 7.317073170731707e-06,
269
+ "loss": 0.1553,
270
+ "step": 390
271
+ },
272
+ {
273
+ "epoch": 3.23,
274
+ "learning_rate": 6.991869918699188e-06,
275
+ "loss": 0.1529,
276
+ "step": 400
277
+ },
278
+ {
279
+ "epoch": 3.31,
280
+ "learning_rate": 6.666666666666667e-06,
281
+ "loss": 0.1589,
282
+ "step": 410
283
+ },
284
+ {
285
+ "epoch": 3.39,
286
+ "learning_rate": 6.341463414634147e-06,
287
+ "loss": 0.1514,
288
+ "step": 420
289
+ },
290
+ {
291
+ "epoch": 3.47,
292
+ "learning_rate": 6.016260162601627e-06,
293
+ "loss": 0.1524,
294
+ "step": 430
295
+ },
296
+ {
297
+ "epoch": 3.56,
298
+ "learning_rate": 5.691056910569106e-06,
299
+ "loss": 0.1577,
300
+ "step": 440
301
+ },
302
+ {
303
+ "epoch": 3.64,
304
+ "learning_rate": 5.365853658536586e-06,
305
+ "loss": 0.1504,
306
+ "step": 450
307
+ },
308
+ {
309
+ "epoch": 3.72,
310
+ "learning_rate": 5.040650406504065e-06,
311
+ "loss": 0.151,
312
+ "step": 460
313
+ },
314
+ {
315
+ "epoch": 3.8,
316
+ "learning_rate": 4.715447154471545e-06,
317
+ "loss": 0.1539,
318
+ "step": 470
319
+ },
320
+ {
321
+ "epoch": 3.88,
322
+ "learning_rate": 4.390243902439025e-06,
323
+ "loss": 0.1575,
324
+ "step": 480
325
+ },
326
+ {
327
+ "epoch": 3.96,
328
+ "learning_rate": 4.0650406504065046e-06,
329
+ "loss": 0.1476,
330
+ "step": 490
331
+ },
332
+ {
333
+ "epoch": 4.0,
334
+ "eval_f1": 0.7060445874511607,
335
+ "eval_loss": 0.18790604174137115,
336
+ "eval_runtime": 43.1492,
337
+ "eval_samples_per_second": 22.92,
338
+ "eval_steps_per_second": 1.437,
339
+ "step": 495
340
+ },
341
+ {
342
+ "epoch": 4.04,
343
+ "learning_rate": 3.7398373983739838e-06,
344
+ "loss": 0.1412,
345
+ "step": 500
346
+ },
347
+ {
348
+ "epoch": 4.12,
349
+ "learning_rate": 3.414634146341464e-06,
350
+ "loss": 0.1274,
351
+ "step": 510
352
+ },
353
+ {
354
+ "epoch": 4.2,
355
+ "learning_rate": 3.0894308943089435e-06,
356
+ "loss": 0.1266,
357
+ "step": 520
358
+ },
359
+ {
360
+ "epoch": 4.28,
361
+ "learning_rate": 2.764227642276423e-06,
362
+ "loss": 0.1275,
363
+ "step": 530
364
+ },
365
+ {
366
+ "epoch": 4.36,
367
+ "learning_rate": 2.4390243902439027e-06,
368
+ "loss": 0.1222,
369
+ "step": 540
370
+ },
371
+ {
372
+ "epoch": 4.44,
373
+ "learning_rate": 2.1138211382113824e-06,
374
+ "loss": 0.1275,
375
+ "step": 550
376
+ },
377
+ {
378
+ "epoch": 4.53,
379
+ "learning_rate": 1.788617886178862e-06,
380
+ "loss": 0.1248,
381
+ "step": 560
382
+ },
383
+ {
384
+ "epoch": 4.61,
385
+ "learning_rate": 1.4634146341463414e-06,
386
+ "loss": 0.1308,
387
+ "step": 570
388
+ },
389
+ {
390
+ "epoch": 4.69,
391
+ "learning_rate": 1.1382113821138213e-06,
392
+ "loss": 0.1272,
393
+ "step": 580
394
+ },
395
+ {
396
+ "epoch": 4.77,
397
+ "learning_rate": 8.130081300813009e-07,
398
+ "loss": 0.1261,
399
+ "step": 590
400
+ },
401
+ {
402
+ "epoch": 4.85,
403
+ "learning_rate": 4.878048780487805e-07,
404
+ "loss": 0.1289,
405
+ "step": 600
406
+ },
407
+ {
408
+ "epoch": 4.93,
409
+ "learning_rate": 1.6260162601626018e-07,
410
+ "loss": 0.1279,
411
+ "step": 610
412
+ },
413
+ {
414
+ "epoch": 4.97,
415
+ "eval_f1": 0.7058281501958075,
416
+ "eval_loss": 0.1904972940683365,
417
+ "eval_runtime": 43.1284,
418
+ "eval_samples_per_second": 22.932,
419
+ "eval_steps_per_second": 1.438,
420
+ "step": 615
421
  },
422
  {
423
+ "epoch": 4.97,
424
+ "step": 615,
425
+ "total_flos": 2.6435427934519296e+16,
426
+ "train_loss": 0.2132105562745071,
427
+ "train_runtime": 4145.08,
428
+ "train_samples_per_second": 9.546,
429
+ "train_steps_per_second": 0.148
430
  }
431
  ],
432
  "logging_steps": 10,
433
+ "max_steps": 615,
434
  "num_train_epochs": 5,
435
  "save_steps": 500,
436
+ "total_flos": 2.6435427934519296e+16,
437
  "trial_name": null,
438
  "trial_params": null
439
  }