josesantorcuato commited on
Commit
a483a91
1 Parent(s): 0706443

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.04375,
3
- "eval_accuracy": 0.5714285714285714,
4
- "eval_loss": 1.1454538106918335,
5
- "eval_runtime": 3.2395,
6
- "eval_samples_per_second": 8.643,
7
- "eval_steps_per_second": 1.235
8
  }
 
1
  {
2
+ "epoch": 9.1,
3
+ "eval_accuracy": 0.7857142857142857,
4
+ "eval_loss": 0.7852017283439636,
5
+ "eval_runtime": 2.5877,
6
+ "eval_samples_per_second": 10.821,
7
+ "eval_steps_per_second": 1.546
8
  }
runs/Nov05_16-00-57_612d7c91d57a/events.out.tfevents.1730823607.612d7c91d57a.6074.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d359571268aea4298fa123d2911dfe48b114ebeacb8c3d0a542358a79411841c
3
- size 411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52f84c93dfaf724cdd68880343a510e6f66188001bfab279009ee7e7c971a2af
3
+ size 734
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.04375,
3
- "eval_accuracy": 0.5714285714285714,
4
- "eval_loss": 1.1454538106918335,
5
- "eval_runtime": 3.2395,
6
- "eval_samples_per_second": 8.643,
7
- "eval_steps_per_second": 1.235
8
  }
 
1
  {
2
+ "epoch": 9.1,
3
+ "eval_accuracy": 0.7857142857142857,
4
+ "eval_loss": 0.7852017283439636,
5
+ "eval_runtime": 2.5877,
6
+ "eval_samples_per_second": 10.821,
7
+ "eval_steps_per_second": 1.546
8
  }
trainer_state.json CHANGED
@@ -1,245 +1,399 @@
1
  {
2
- "best_metric": 0.6428571428571429,
3
- "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-153",
4
- "epoch": 9.04375,
5
  "eval_steps": 500,
6
- "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0625,
13
- "grad_norm": 8.446556091308594,
14
- "learning_rate": 3.125e-05,
15
- "loss": 1.4262,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.10625,
20
- "eval_accuracy": 0.35714285714285715,
21
- "eval_loss": 1.4080798625946045,
22
- "eval_runtime": 4.4913,
23
- "eval_samples_per_second": 3.117,
24
- "eval_steps_per_second": 0.445,
25
- "step": 17
26
- },
27
- {
28
- "epoch": 1.01875,
29
- "grad_norm": 9.888665199279785,
30
- "learning_rate": 4.8611111111111115e-05,
31
- "loss": 1.3656,
32
  "step": 20
33
  },
34
  {
35
- "epoch": 1.08125,
36
- "grad_norm": 5.500328540802002,
37
- "learning_rate": 4.5138888888888894e-05,
38
- "loss": 1.3918,
39
  "step": 30
40
  },
41
  {
42
- "epoch": 1.10625,
43
- "eval_accuracy": 0.21428571428571427,
44
- "eval_loss": 1.579779863357544,
45
- "eval_runtime": 1.2328,
46
- "eval_samples_per_second": 11.357,
47
- "eval_steps_per_second": 1.622,
48
- "step": 34
49
  },
50
  {
51
- "epoch": 2.0375,
52
- "grad_norm": 6.509393692016602,
53
- "learning_rate": 4.166666666666667e-05,
54
- "loss": 1.3092,
55
  "step": 40
56
  },
57
  {
58
- "epoch": 2.1,
59
- "grad_norm": 9.822772979736328,
60
- "learning_rate": 3.8194444444444444e-05,
61
- "loss": 1.2887,
62
  "step": 50
63
  },
64
  {
65
- "epoch": 2.10625,
66
- "eval_accuracy": 0.5,
67
- "eval_loss": 1.3649126291275024,
68
- "eval_runtime": 1.2619,
69
- "eval_samples_per_second": 11.095,
70
- "eval_steps_per_second": 1.585,
71
- "step": 51
72
  },
73
  {
74
- "epoch": 3.05625,
75
- "grad_norm": 7.699404239654541,
76
- "learning_rate": 3.472222222222222e-05,
77
- "loss": 1.3115,
78
- "step": 60
79
  },
80
  {
81
- "epoch": 3.10625,
82
  "eval_accuracy": 0.42857142857142855,
83
- "eval_loss": 1.4629420042037964,
84
- "eval_runtime": 1.2724,
85
- "eval_samples_per_second": 11.003,
86
- "eval_steps_per_second": 1.572,
87
- "step": 68
88
  },
89
  {
90
- "epoch": 4.0125,
91
- "grad_norm": 6.042768955230713,
92
- "learning_rate": 3.125e-05,
93
- "loss": 1.1749,
94
- "step": 70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  },
96
  {
97
- "epoch": 4.075,
98
- "grad_norm": 6.405767917633057,
 
 
 
 
 
 
 
99
  "learning_rate": 2.777777777777778e-05,
100
- "loss": 1.0533,
101
- "step": 80
102
  },
103
  {
104
- "epoch": 4.10625,
105
- "eval_accuracy": 0.2857142857142857,
106
- "eval_loss": 1.5868721008300781,
107
- "eval_runtime": 1.3163,
108
- "eval_samples_per_second": 10.636,
109
- "eval_steps_per_second": 1.519,
110
- "step": 85
111
  },
112
  {
113
- "epoch": 5.03125,
114
- "grad_norm": 5.696847915649414,
115
- "learning_rate": 2.4305555555555558e-05,
116
- "loss": 1.1042,
117
- "step": 90
118
  },
119
  {
120
- "epoch": 5.09375,
121
- "grad_norm": 9.622387886047363,
122
- "learning_rate": 2.0833333333333336e-05,
123
- "loss": 0.9616,
124
- "step": 100
125
  },
126
  {
127
- "epoch": 5.10625,
128
- "eval_accuracy": 0.35714285714285715,
129
- "eval_loss": 1.7206089496612549,
130
- "eval_runtime": 1.3192,
131
- "eval_samples_per_second": 10.612,
132
- "eval_steps_per_second": 1.516,
133
- "step": 102
134
  },
135
  {
136
- "epoch": 6.05,
137
- "grad_norm": 10.931577682495117,
138
- "learning_rate": 1.736111111111111e-05,
139
- "loss": 0.9531,
140
- "step": 110
 
 
141
  },
142
  {
143
- "epoch": 6.10625,
144
- "eval_accuracy": 0.35714285714285715,
145
- "eval_loss": 1.440622329711914,
146
- "eval_runtime": 1.1712,
147
- "eval_samples_per_second": 11.954,
148
- "eval_steps_per_second": 1.708,
149
- "step": 119
150
  },
151
  {
152
- "epoch": 7.00625,
153
- "grad_norm": 12.279280662536621,
154
- "learning_rate": 1.388888888888889e-05,
155
- "loss": 0.7034,
156
- "step": 120
157
  },
158
  {
159
- "epoch": 7.06875,
160
- "grad_norm": 15.174863815307617,
161
- "learning_rate": 1.0416666666666668e-05,
162
- "loss": 0.6531,
163
- "step": 130
164
  },
165
  {
166
- "epoch": 7.10625,
167
- "eval_accuracy": 0.5,
168
- "eval_loss": 1.329153299331665,
169
- "eval_runtime": 1.1705,
170
- "eval_samples_per_second": 11.96,
171
- "eval_steps_per_second": 1.709,
172
- "step": 136
173
  },
174
  {
175
- "epoch": 8.025,
176
- "grad_norm": 12.259533882141113,
177
- "learning_rate": 6.944444444444445e-06,
178
- "loss": 0.6896,
179
- "step": 140
 
 
180
  },
181
  {
182
- "epoch": 8.0875,
183
- "grad_norm": 14.428750991821289,
184
- "learning_rate": 3.4722222222222224e-06,
185
- "loss": 0.5778,
186
- "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  },
188
  {
189
- "epoch": 8.10625,
190
- "eval_accuracy": 0.6428571428571429,
191
- "eval_loss": 1.121216058731079,
192
- "eval_runtime": 1.0823,
193
- "eval_samples_per_second": 12.936,
194
- "eval_steps_per_second": 1.848,
195
- "step": 153
196
  },
197
  {
198
- "epoch": 9.04375,
199
- "grad_norm": 12.301226615905762,
200
  "learning_rate": 0.0,
201
- "loss": 0.4835,
202
- "step": 160
203
  },
204
  {
205
- "epoch": 9.04375,
206
- "eval_accuracy": 0.5714285714285714,
207
- "eval_loss": 1.113793134689331,
208
- "eval_runtime": 1.2842,
209
- "eval_samples_per_second": 10.902,
210
- "eval_steps_per_second": 1.557,
211
- "step": 160
212
  },
213
  {
214
- "epoch": 9.04375,
215
- "step": 160,
216
- "total_flos": 1.5277024347362427e+18,
217
- "train_loss": 1.0279614835977555,
218
- "train_runtime": 373.3169,
219
- "train_samples_per_second": 3.429,
220
- "train_steps_per_second": 0.429
221
  },
222
  {
223
- "epoch": 9.04375,
224
- "eval_accuracy": 0.5714285714285714,
225
- "eval_loss": 1.1454538106918335,
226
- "eval_runtime": 10.4142,
227
- "eval_samples_per_second": 2.689,
228
- "eval_steps_per_second": 0.384,
229
- "step": 160
230
  },
231
  {
232
- "epoch": 9.04375,
233
- "eval_accuracy": 0.5714285714285714,
234
- "eval_loss": 1.1454538106918335,
235
- "eval_runtime": 3.2395,
236
- "eval_samples_per_second": 8.643,
237
- "eval_steps_per_second": 1.235,
238
- "step": 160
239
  }
240
  ],
241
  "logging_steps": 10,
242
- "max_steps": 160,
243
  "num_input_tokens_seen": 0,
244
  "num_train_epochs": 9223372036854775807,
245
  "save_steps": 500,
@@ -255,7 +409,7 @@
255
  "attributes": {}
256
  }
257
  },
258
- "total_flos": 1.5277024347362427e+18,
259
  "train_batch_size": 8,
260
  "trial_name": null,
261
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-266",
4
+ "epoch": 9.1,
5
  "eval_steps": 500,
6
+ "global_step": 380,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.02631578947368421,
13
+ "grad_norm": 7.891677379608154,
14
+ "learning_rate": 1.3157894736842106e-05,
15
+ "loss": 1.3956,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.05263157894736842,
20
+ "grad_norm": 9.808924674987793,
21
+ "learning_rate": 2.6315789473684212e-05,
22
+ "loss": 1.3551,
 
 
 
 
 
 
 
 
 
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.07894736842105263,
27
+ "grad_norm": 11.947072982788086,
28
+ "learning_rate": 3.9473684210526316e-05,
29
+ "loss": 1.3762,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.1,
34
+ "eval_accuracy": 0.2857142857142857,
35
+ "eval_loss": 1.4489834308624268,
36
+ "eval_runtime": 6.6934,
37
+ "eval_samples_per_second": 2.092,
38
+ "eval_steps_per_second": 0.299,
39
+ "step": 38
40
  },
41
  {
42
+ "epoch": 1.0052631578947369,
43
+ "grad_norm": 9.198051452636719,
44
+ "learning_rate": 4.970760233918128e-05,
45
+ "loss": 1.3077,
46
  "step": 40
47
  },
48
  {
49
+ "epoch": 1.0315789473684212,
50
+ "grad_norm": 8.386839866638184,
51
+ "learning_rate": 4.824561403508772e-05,
52
+ "loss": 1.3181,
53
  "step": 50
54
  },
55
  {
56
+ "epoch": 1.0578947368421052,
57
+ "grad_norm": 7.219974040985107,
58
+ "learning_rate": 4.678362573099415e-05,
59
+ "loss": 1.3483,
60
+ "step": 60
 
 
61
  },
62
  {
63
+ "epoch": 1.0842105263157895,
64
+ "grad_norm": 8.174283027648926,
65
+ "learning_rate": 4.5321637426900585e-05,
66
+ "loss": 1.2421,
67
+ "step": 70
68
  },
69
  {
70
+ "epoch": 1.1,
71
  "eval_accuracy": 0.42857142857142855,
72
+ "eval_loss": 1.3189738988876343,
73
+ "eval_runtime": 1.1969,
74
+ "eval_samples_per_second": 11.696,
75
+ "eval_steps_per_second": 1.671,
76
+ "step": 76
77
  },
78
  {
79
+ "epoch": 2.0105263157894737,
80
+ "grad_norm": 8.107172012329102,
81
+ "learning_rate": 4.3859649122807014e-05,
82
+ "loss": 1.192,
83
+ "step": 80
84
+ },
85
+ {
86
+ "epoch": 2.036842105263158,
87
+ "grad_norm": 4.386706352233887,
88
+ "learning_rate": 4.239766081871345e-05,
89
+ "loss": 1.1036,
90
+ "step": 90
91
+ },
92
+ {
93
+ "epoch": 2.0631578947368423,
94
+ "grad_norm": 22.933664321899414,
95
+ "learning_rate": 4.093567251461988e-05,
96
+ "loss": 0.9115,
97
+ "step": 100
98
+ },
99
+ {
100
+ "epoch": 2.0894736842105264,
101
+ "grad_norm": 8.911148071289062,
102
+ "learning_rate": 3.9473684210526316e-05,
103
+ "loss": 0.8753,
104
+ "step": 110
105
+ },
106
+ {
107
+ "epoch": 2.1,
108
+ "eval_accuracy": 0.5714285714285714,
109
+ "eval_loss": 0.9505947828292847,
110
+ "eval_runtime": 1.3125,
111
+ "eval_samples_per_second": 10.667,
112
+ "eval_steps_per_second": 1.524,
113
+ "step": 114
114
+ },
115
+ {
116
+ "epoch": 3.0157894736842104,
117
+ "grad_norm": 12.133541107177734,
118
+ "learning_rate": 3.8011695906432746e-05,
119
+ "loss": 0.6292,
120
+ "step": 120
121
+ },
122
+ {
123
+ "epoch": 3.042105263157895,
124
+ "grad_norm": 3.894150495529175,
125
+ "learning_rate": 3.654970760233918e-05,
126
+ "loss": 0.4353,
127
+ "step": 130
128
+ },
129
+ {
130
+ "epoch": 3.068421052631579,
131
+ "grad_norm": 16.60247039794922,
132
+ "learning_rate": 3.508771929824561e-05,
133
+ "loss": 0.6194,
134
+ "step": 140
135
+ },
136
+ {
137
+ "epoch": 3.094736842105263,
138
+ "grad_norm": 9.527063369750977,
139
+ "learning_rate": 3.362573099415205e-05,
140
+ "loss": 0.4285,
141
+ "step": 150
142
+ },
143
+ {
144
+ "epoch": 3.1,
145
+ "eval_accuracy": 0.7857142857142857,
146
+ "eval_loss": 0.5579931139945984,
147
+ "eval_runtime": 1.2045,
148
+ "eval_samples_per_second": 11.623,
149
+ "eval_steps_per_second": 1.66,
150
+ "step": 152
151
+ },
152
+ {
153
+ "epoch": 4.021052631578947,
154
+ "grad_norm": 10.711992263793945,
155
+ "learning_rate": 3.216374269005848e-05,
156
+ "loss": 0.2534,
157
+ "step": 160
158
+ },
159
+ {
160
+ "epoch": 4.0473684210526315,
161
+ "grad_norm": 0.5280627012252808,
162
+ "learning_rate": 3.0701754385964913e-05,
163
+ "loss": 0.1613,
164
+ "step": 170
165
  },
166
  {
167
+ "epoch": 4.073684210526316,
168
+ "grad_norm": 6.357590675354004,
169
+ "learning_rate": 2.9239766081871346e-05,
170
+ "loss": 0.2539,
171
+ "step": 180
172
+ },
173
+ {
174
+ "epoch": 4.1,
175
+ "grad_norm": 10.521815299987793,
176
  "learning_rate": 2.777777777777778e-05,
177
+ "loss": 0.3808,
178
+ "step": 190
179
  },
180
  {
181
+ "epoch": 4.1,
182
+ "eval_accuracy": 0.8571428571428571,
183
+ "eval_loss": 0.49507784843444824,
184
+ "eval_runtime": 1.2129,
185
+ "eval_samples_per_second": 11.543,
186
+ "eval_steps_per_second": 1.649,
187
+ "step": 190
188
  },
189
  {
190
+ "epoch": 5.026315789473684,
191
+ "grad_norm": 0.37827879190444946,
192
+ "learning_rate": 2.6315789473684212e-05,
193
+ "loss": 0.0941,
194
+ "step": 200
195
  },
196
  {
197
+ "epoch": 5.052631578947368,
198
+ "grad_norm": 15.41084098815918,
199
+ "learning_rate": 2.485380116959064e-05,
200
+ "loss": 0.1117,
201
+ "step": 210
202
  },
203
  {
204
+ "epoch": 5.078947368421052,
205
+ "grad_norm": 16.81968879699707,
206
+ "learning_rate": 2.3391812865497074e-05,
207
+ "loss": 0.1368,
208
+ "step": 220
 
 
209
  },
210
  {
211
+ "epoch": 5.1,
212
+ "eval_accuracy": 0.9285714285714286,
213
+ "eval_loss": 0.1577732115983963,
214
+ "eval_runtime": 1.1295,
215
+ "eval_samples_per_second": 12.395,
216
+ "eval_steps_per_second": 1.771,
217
+ "step": 228
218
  },
219
  {
220
+ "epoch": 6.005263157894737,
221
+ "grad_norm": 1.9107282161712646,
222
+ "learning_rate": 2.1929824561403507e-05,
223
+ "loss": 0.1195,
224
+ "step": 230
 
 
225
  },
226
  {
227
+ "epoch": 6.031578947368421,
228
+ "grad_norm": 0.2877643406391144,
229
+ "learning_rate": 2.046783625730994e-05,
230
+ "loss": 0.074,
231
+ "step": 240
232
  },
233
  {
234
+ "epoch": 6.057894736842106,
235
+ "grad_norm": 15.423999786376953,
236
+ "learning_rate": 1.9005847953216373e-05,
237
+ "loss": 0.3348,
238
+ "step": 250
239
  },
240
  {
241
+ "epoch": 6.08421052631579,
242
+ "grad_norm": 13.305224418640137,
243
+ "learning_rate": 1.7543859649122806e-05,
244
+ "loss": 0.043,
245
+ "step": 260
 
 
246
  },
247
  {
248
+ "epoch": 6.1,
249
+ "eval_accuracy": 1.0,
250
+ "eval_loss": 0.04753781110048294,
251
+ "eval_runtime": 1.1871,
252
+ "eval_samples_per_second": 11.794,
253
+ "eval_steps_per_second": 1.685,
254
+ "step": 266
255
  },
256
  {
257
+ "epoch": 7.010526315789473,
258
+ "grad_norm": 1.7253011465072632,
259
+ "learning_rate": 1.608187134502924e-05,
260
+ "loss": 0.2052,
261
+ "step": 270
262
+ },
263
+ {
264
+ "epoch": 7.036842105263158,
265
+ "grad_norm": 0.0725923627614975,
266
+ "learning_rate": 1.4619883040935673e-05,
267
+ "loss": 0.0899,
268
+ "step": 280
269
+ },
270
+ {
271
+ "epoch": 7.063157894736842,
272
+ "grad_norm": 0.664633572101593,
273
+ "learning_rate": 1.3157894736842106e-05,
274
+ "loss": 0.0753,
275
+ "step": 290
276
+ },
277
+ {
278
+ "epoch": 7.089473684210526,
279
+ "grad_norm": 0.12937086820602417,
280
+ "learning_rate": 1.1695906432748537e-05,
281
+ "loss": 0.0842,
282
+ "step": 300
283
+ },
284
+ {
285
+ "epoch": 7.1,
286
+ "eval_accuracy": 1.0,
287
+ "eval_loss": 0.06243452429771423,
288
+ "eval_runtime": 1.1818,
289
+ "eval_samples_per_second": 11.847,
290
+ "eval_steps_per_second": 1.692,
291
+ "step": 304
292
+ },
293
+ {
294
+ "epoch": 8.01578947368421,
295
+ "grad_norm": 0.04959488287568092,
296
+ "learning_rate": 1.023391812865497e-05,
297
+ "loss": 0.0558,
298
+ "step": 310
299
+ },
300
+ {
301
+ "epoch": 8.042105263157895,
302
+ "grad_norm": 0.07736323028802872,
303
+ "learning_rate": 8.771929824561403e-06,
304
+ "loss": 0.004,
305
+ "step": 320
306
+ },
307
+ {
308
+ "epoch": 8.06842105263158,
309
+ "grad_norm": 0.037877339869737625,
310
+ "learning_rate": 7.3099415204678366e-06,
311
+ "loss": 0.0784,
312
+ "step": 330
313
+ },
314
+ {
315
+ "epoch": 8.094736842105263,
316
+ "grad_norm": 0.04343694821000099,
317
+ "learning_rate": 5.8479532163742686e-06,
318
+ "loss": 0.003,
319
+ "step": 340
320
+ },
321
+ {
322
+ "epoch": 8.1,
323
+ "eval_accuracy": 1.0,
324
+ "eval_loss": 0.05573272332549095,
325
+ "eval_runtime": 1.2047,
326
+ "eval_samples_per_second": 11.621,
327
+ "eval_steps_per_second": 1.66,
328
+ "step": 342
329
+ },
330
+ {
331
+ "epoch": 9.021052631578947,
332
+ "grad_norm": 0.037801120430231094,
333
+ "learning_rate": 4.3859649122807014e-06,
334
+ "loss": 0.0041,
335
+ "step": 350
336
+ },
337
+ {
338
+ "epoch": 9.047368421052632,
339
+ "grad_norm": 0.3717033863067627,
340
+ "learning_rate": 2.9239766081871343e-06,
341
+ "loss": 0.0051,
342
+ "step": 360
343
  },
344
  {
345
+ "epoch": 9.073684210526316,
346
+ "grad_norm": 0.030566079542040825,
347
+ "learning_rate": 1.4619883040935671e-06,
348
+ "loss": 0.0034,
349
+ "step": 370
 
 
350
  },
351
  {
352
+ "epoch": 9.1,
353
+ "grad_norm": 0.05392751097679138,
354
  "learning_rate": 0.0,
355
+ "loss": 0.0828,
356
+ "step": 380
357
  },
358
  {
359
+ "epoch": 9.1,
360
+ "eval_accuracy": 1.0,
361
+ "eval_loss": 0.04455011337995529,
362
+ "eval_runtime": 1.3077,
363
+ "eval_samples_per_second": 10.706,
364
+ "eval_steps_per_second": 1.529,
365
+ "step": 380
366
  },
367
  {
368
+ "epoch": 9.1,
369
+ "step": 380,
370
+ "total_flos": 3.7881039164748595e+18,
371
+ "train_loss": 0.47875460998988467,
372
+ "train_runtime": 960.3946,
373
+ "train_samples_per_second": 3.165,
374
+ "train_steps_per_second": 0.396
375
  },
376
  {
377
+ "epoch": 9.1,
378
+ "eval_accuracy": 0.7857142857142857,
379
+ "eval_loss": 0.785201907157898,
380
+ "eval_runtime": 15.1964,
381
+ "eval_samples_per_second": 1.843,
382
+ "eval_steps_per_second": 0.263,
383
+ "step": 380
384
  },
385
  {
386
+ "epoch": 9.1,
387
+ "eval_accuracy": 0.7857142857142857,
388
+ "eval_loss": 0.7852017283439636,
389
+ "eval_runtime": 2.5877,
390
+ "eval_samples_per_second": 10.821,
391
+ "eval_steps_per_second": 1.546,
392
+ "step": 380
393
  }
394
  ],
395
  "logging_steps": 10,
396
+ "max_steps": 380,
397
  "num_input_tokens_seen": 0,
398
  "num_train_epochs": 9223372036854775807,
399
  "save_steps": 500,
 
409
  "attributes": {}
410
  }
411
  },
412
+ "total_flos": 3.7881039164748595e+18,
413
  "train_batch_size": 8,
414
  "trial_name": null,
415
  "trial_params": null