nouamanetazi HF staff commited on
Commit
ac5fb9c
1 Parent(s): 16c3cc2

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +5 -5
  3. train_results.json +6 -6
  4. trainer_state.json +260 -110
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_loss": 26.253612518310547,
4
- "eval_runtime": 5.982,
5
  "eval_samples": 128,
6
- "eval_samples_per_second": 21.397,
7
- "eval_steps_per_second": 0.334,
8
  "eval_wer": 1.0,
9
- "total_flos": 1.3476444758728704e+17,
10
- "train_loss": 16.66825189590454,
11
- "train_runtime": 91.9274,
12
  "train_samples": 128,
13
- "train_samples_per_second": 6.962,
14
- "train_steps_per_second": 0.109
15
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_loss": 6.937458515167236,
4
+ "eval_runtime": 5.7217,
5
  "eval_samples": 128,
6
+ "eval_samples_per_second": 22.371,
7
+ "eval_steps_per_second": 0.35,
8
  "eval_wer": 1.0,
9
+ "total_flos": 5.430583918308557e+17,
10
+ "train_loss": 8.69529299736023,
11
+ "train_runtime": 243.8197,
12
  "train_samples": 128,
13
+ "train_samples_per_second": 10.5,
14
+ "train_steps_per_second": 0.164
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_loss": 26.253612518310547,
4
- "eval_runtime": 5.982,
5
  "eval_samples": 128,
6
- "eval_samples_per_second": 21.397,
7
- "eval_steps_per_second": 0.334,
8
  "eval_wer": 1.0
9
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_loss": 6.937458515167236,
4
+ "eval_runtime": 5.7217,
5
  "eval_samples": 128,
6
+ "eval_samples_per_second": 22.371,
7
+ "eval_steps_per_second": 0.35,
8
  "eval_wer": 1.0
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 1.3476444758728704e+17,
4
- "train_loss": 16.66825189590454,
5
- "train_runtime": 91.9274,
6
  "train_samples": 128,
7
- "train_samples_per_second": 6.962,
8
- "train_steps_per_second": 0.109
9
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "total_flos": 5.430583918308557e+17,
4
+ "train_loss": 8.69529299736023,
5
+ "train_runtime": 243.8197,
6
  "train_samples": 128,
7
+ "train_samples_per_second": 10.5,
8
+ "train_steps_per_second": 0.164
9
  }
trainer_state.json CHANGED
@@ -1,175 +1,325 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
- "global_step": 10,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.5,
12
  "learning_rate": 7.5e-05,
13
- "loss": 18.7839,
14
- "step": 1
15
  },
16
  {
17
- "epoch": 0.5,
18
  "eval_loss": 32.55470657348633,
19
- "eval_runtime": 5.5933,
20
- "eval_samples_per_second": 22.884,
21
- "eval_steps_per_second": 0.358,
22
  "eval_wer": 1.0,
23
- "step": 1
24
  },
25
  {
26
- "epoch": 1.0,
27
- "learning_rate": 7.5e-05,
28
- "loss": 19.4045,
29
- "step": 2
30
  },
31
  {
32
- "epoch": 1.0,
33
- "eval_loss": 32.55470657348633,
34
- "eval_runtime": 5.6123,
35
- "eval_samples_per_second": 22.807,
36
- "eval_steps_per_second": 0.356,
37
  "eval_wer": 1.0,
38
- "step": 2
39
  },
40
  {
41
- "epoch": 1.5,
42
- "learning_rate": 6.75e-05,
43
- "loss": 19.6279,
44
- "step": 3
45
  },
46
  {
47
- "epoch": 1.5,
48
- "eval_loss": 31.884950637817383,
49
- "eval_runtime": 5.4543,
50
- "eval_samples_per_second": 23.468,
51
- "eval_steps_per_second": 0.367,
52
  "eval_wer": 1.0,
53
- "step": 3
54
  },
55
  {
56
- "epoch": 2.0,
57
- "learning_rate": 5.9999999999999995e-05,
58
- "loss": 17.7216,
59
- "step": 4
60
  },
61
  {
62
- "epoch": 2.0,
63
- "eval_loss": 31.10820960998535,
64
- "eval_runtime": 5.4982,
65
- "eval_samples_per_second": 23.28,
66
- "eval_steps_per_second": 0.364,
67
  "eval_wer": 1.0,
68
- "step": 4
69
  },
70
  {
71
- "epoch": 2.5,
72
- "learning_rate": 5.9999999999999995e-05,
73
- "loss": 17.3103,
74
- "step": 5
75
  },
76
  {
77
- "epoch": 2.5,
78
- "eval_loss": 31.10820960998535,
79
- "eval_runtime": 5.9789,
80
- "eval_samples_per_second": 21.409,
81
- "eval_steps_per_second": 0.335,
82
  "eval_wer": 1.0,
83
- "step": 5
84
  },
85
  {
86
- "epoch": 3.0,
87
- "learning_rate": 5.2499999999999995e-05,
88
- "loss": 17.2026,
89
- "step": 6
90
  },
91
  {
92
- "epoch": 3.0,
93
- "eval_loss": 29.966575622558594,
94
- "eval_runtime": 5.3861,
95
- "eval_samples_per_second": 23.765,
96
- "eval_steps_per_second": 0.371,
97
  "eval_wer": 1.0,
98
- "step": 6
99
  },
100
  {
101
- "epoch": 3.5,
102
- "learning_rate": 4.4999999999999996e-05,
103
- "loss": 16.7911,
104
- "step": 7
105
  },
106
  {
107
- "epoch": 3.5,
108
- "eval_loss": 28.54442596435547,
109
- "eval_runtime": 5.434,
110
- "eval_samples_per_second": 23.555,
111
- "eval_steps_per_second": 0.368,
112
  "eval_wer": 1.0,
113
- "step": 7
114
  },
115
  {
116
- "epoch": 4.0,
117
- "learning_rate": 3.75e-05,
118
- "loss": 13.7364,
119
- "step": 8
120
  },
121
  {
122
- "epoch": 4.0,
123
- "eval_loss": 25.79315185546875,
124
- "eval_runtime": 5.4455,
125
- "eval_samples_per_second": 23.506,
126
- "eval_steps_per_second": 0.367,
127
  "eval_wer": 1.0,
128
- "step": 8
129
  },
130
  {
131
- "epoch": 4.5,
132
- "learning_rate": 2.9999999999999997e-05,
133
- "loss": 13.1602,
134
- "step": 9
135
  },
136
  {
137
- "epoch": 4.5,
138
- "eval_loss": 23.46338653564453,
139
- "eval_runtime": 5.3714,
140
- "eval_samples_per_second": 23.83,
141
- "eval_steps_per_second": 0.372,
142
  "eval_wer": 1.0,
143
- "step": 9
144
  },
145
  {
146
- "epoch": 5.0,
147
- "learning_rate": 2.2499999999999998e-05,
148
- "loss": 12.9439,
149
- "step": 10
150
  },
151
  {
152
- "epoch": 5.0,
153
- "eval_loss": 21.93878936767578,
154
- "eval_runtime": 5.4116,
155
- "eval_samples_per_second": 23.653,
156
- "eval_steps_per_second": 0.37,
157
  "eval_wer": 1.0,
158
- "step": 10
159
  },
160
  {
161
- "epoch": 5.0,
162
- "step": 10,
163
- "total_flos": 1.3476444758728704e+17,
164
- "train_loss": 16.66825189590454,
165
- "train_runtime": 91.9274,
166
- "train_samples_per_second": 6.962,
167
- "train_steps_per_second": 0.109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  }
169
  ],
170
- "max_steps": 10,
171
- "num_train_epochs": 5,
172
- "total_flos": 1.3476444758728704e+17,
173
  "trial_name": null,
174
  "trial_params": null
175
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
+ "global_step": 40,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.0,
12
  "learning_rate": 7.5e-05,
13
+ "loss": 19.0933,
14
+ "step": 2
15
  },
16
  {
17
+ "epoch": 1.0,
18
  "eval_loss": 32.55470657348633,
19
+ "eval_runtime": 5.3015,
20
+ "eval_samples_per_second": 24.144,
21
+ "eval_steps_per_second": 0.377,
22
  "eval_wer": 1.0,
23
+ "step": 2
24
  },
25
  {
26
+ "epoch": 2.0,
27
+ "learning_rate": 7.125e-05,
28
+ "loss": 18.6757,
29
+ "step": 4
30
  },
31
  {
32
+ "epoch": 2.0,
33
+ "eval_loss": 31.040802001953125,
34
+ "eval_runtime": 5.2939,
35
+ "eval_samples_per_second": 24.179,
36
+ "eval_steps_per_second": 0.378,
37
  "eval_wer": 1.0,
38
+ "step": 4
39
  },
40
  {
41
+ "epoch": 3.0,
42
+ "learning_rate": 6.937499999999999e-05,
43
+ "loss": 17.1649,
44
+ "step": 6
45
  },
46
  {
47
+ "epoch": 3.0,
48
+ "eval_loss": 29.57763671875,
49
+ "eval_runtime": 5.2818,
50
+ "eval_samples_per_second": 24.234,
51
+ "eval_steps_per_second": 0.379,
52
  "eval_wer": 1.0,
53
+ "step": 6
54
  },
55
  {
56
+ "epoch": 4.0,
57
+ "learning_rate": 6.5625e-05,
58
+ "loss": 14.7415,
59
+ "step": 8
60
  },
61
  {
62
+ "epoch": 4.0,
63
+ "eval_loss": 22.918315887451172,
64
+ "eval_runtime": 5.4291,
65
+ "eval_samples_per_second": 23.577,
66
+ "eval_steps_per_second": 0.368,
67
  "eval_wer": 1.0,
68
+ "step": 8
69
  },
70
  {
71
+ "epoch": 5.0,
72
+ "learning_rate": 6.187499999999999e-05,
73
+ "loss": 11.8071,
74
+ "step": 10
75
  },
76
  {
77
+ "epoch": 5.0,
78
+ "eval_loss": 17.507305145263672,
79
+ "eval_runtime": 5.3146,
80
+ "eval_samples_per_second": 24.085,
81
+ "eval_steps_per_second": 0.376,
82
  "eval_wer": 1.0,
83
+ "step": 10
84
  },
85
  {
86
+ "epoch": 6.0,
87
+ "learning_rate": 5.8124999999999997e-05,
88
+ "loss": 9.7675,
89
+ "step": 12
90
  },
91
  {
92
+ "epoch": 6.0,
93
+ "eval_loss": 14.17426872253418,
94
+ "eval_runtime": 5.4054,
95
+ "eval_samples_per_second": 23.68,
96
+ "eval_steps_per_second": 0.37,
97
  "eval_wer": 1.0,
98
+ "step": 12
99
  },
100
  {
101
+ "epoch": 7.0,
102
+ "learning_rate": 5.4374999999999994e-05,
103
+ "loss": 8.4193,
104
+ "step": 14
105
  },
106
  {
107
+ "epoch": 7.0,
108
+ "eval_loss": 12.122542381286621,
109
+ "eval_runtime": 5.2782,
110
+ "eval_samples_per_second": 24.251,
111
+ "eval_steps_per_second": 0.379,
112
  "eval_wer": 1.0,
113
+ "step": 14
114
  },
115
  {
116
+ "epoch": 8.0,
117
+ "learning_rate": 5.0625e-05,
118
+ "loss": 7.4746,
119
+ "step": 16
120
  },
121
  {
122
+ "epoch": 8.0,
123
+ "eval_loss": 10.744585037231445,
124
+ "eval_runtime": 5.3374,
125
+ "eval_samples_per_second": 23.982,
126
+ "eval_steps_per_second": 0.375,
127
  "eval_wer": 1.0,
128
+ "step": 16
129
  },
130
  {
131
+ "epoch": 9.0,
132
+ "learning_rate": 4.6874999999999994e-05,
133
+ "loss": 6.8442,
134
+ "step": 18
135
  },
136
  {
137
+ "epoch": 9.0,
138
+ "eval_loss": 9.794867515563965,
139
+ "eval_runtime": 5.2577,
140
+ "eval_samples_per_second": 24.345,
141
+ "eval_steps_per_second": 0.38,
142
  "eval_wer": 1.0,
143
+ "step": 18
144
  },
145
  {
146
+ "epoch": 10.0,
147
+ "learning_rate": 4.312499999999999e-05,
148
+ "loss": 6.3765,
149
+ "step": 20
150
  },
151
  {
152
+ "epoch": 10.0,
153
+ "eval_loss": 9.114768981933594,
154
+ "eval_runtime": 5.2827,
155
+ "eval_samples_per_second": 24.23,
156
+ "eval_steps_per_second": 0.379,
157
  "eval_wer": 1.0,
158
+ "step": 20
159
  },
160
  {
161
+ "epoch": 11.0,
162
+ "learning_rate": 3.9374999999999995e-05,
163
+ "loss": 6.0321,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 11.0,
168
+ "eval_loss": 8.59341812133789,
169
+ "eval_runtime": 5.2171,
170
+ "eval_samples_per_second": 24.535,
171
+ "eval_steps_per_second": 0.383,
172
+ "eval_wer": 1.0,
173
+ "step": 22
174
+ },
175
+ {
176
+ "epoch": 12.0,
177
+ "learning_rate": 3.5625e-05,
178
+ "loss": 5.7783,
179
+ "step": 24
180
+ },
181
+ {
182
+ "epoch": 12.0,
183
+ "eval_loss": 8.198293685913086,
184
+ "eval_runtime": 5.3204,
185
+ "eval_samples_per_second": 24.058,
186
+ "eval_steps_per_second": 0.376,
187
+ "eval_wer": 1.0,
188
+ "step": 24
189
+ },
190
+ {
191
+ "epoch": 13.0,
192
+ "learning_rate": 3.1874999999999996e-05,
193
+ "loss": 5.5827,
194
+ "step": 26
195
+ },
196
+ {
197
+ "epoch": 13.0,
198
+ "eval_loss": 7.877962112426758,
199
+ "eval_runtime": 5.2188,
200
+ "eval_samples_per_second": 24.527,
201
+ "eval_steps_per_second": 0.383,
202
+ "eval_wer": 1.0,
203
+ "step": 26
204
+ },
205
+ {
206
+ "epoch": 14.0,
207
+ "learning_rate": 2.8125e-05,
208
+ "loss": 5.4249,
209
+ "step": 28
210
+ },
211
+ {
212
+ "epoch": 14.0,
213
+ "eval_loss": 7.628803730010986,
214
+ "eval_runtime": 5.2811,
215
+ "eval_samples_per_second": 24.238,
216
+ "eval_steps_per_second": 0.379,
217
+ "eval_wer": 1.0,
218
+ "step": 28
219
+ },
220
+ {
221
+ "epoch": 15.0,
222
+ "learning_rate": 2.4375e-05,
223
+ "loss": 5.3088,
224
+ "step": 30
225
+ },
226
+ {
227
+ "epoch": 15.0,
228
+ "eval_loss": 7.427917003631592,
229
+ "eval_runtime": 5.236,
230
+ "eval_samples_per_second": 24.446,
231
+ "eval_steps_per_second": 0.382,
232
+ "eval_wer": 1.0,
233
+ "step": 30
234
+ },
235
+ {
236
+ "epoch": 16.0,
237
+ "learning_rate": 2.0625e-05,
238
+ "loss": 5.2078,
239
+ "step": 32
240
+ },
241
+ {
242
+ "epoch": 16.0,
243
+ "eval_loss": 7.268764972686768,
244
+ "eval_runtime": 5.2941,
245
+ "eval_samples_per_second": 24.178,
246
+ "eval_steps_per_second": 0.378,
247
+ "eval_wer": 1.0,
248
+ "step": 32
249
+ },
250
+ {
251
+ "epoch": 17.0,
252
+ "learning_rate": 1.6875e-05,
253
+ "loss": 5.1289,
254
+ "step": 34
255
+ },
256
+ {
257
+ "epoch": 17.0,
258
+ "eval_loss": 7.145933628082275,
259
+ "eval_runtime": 5.2685,
260
+ "eval_samples_per_second": 24.295,
261
+ "eval_steps_per_second": 0.38,
262
+ "eval_wer": 1.0,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 18.0,
267
+ "learning_rate": 1.3124999999999999e-05,
268
+ "loss": 5.0697,
269
+ "step": 36
270
+ },
271
+ {
272
+ "epoch": 18.0,
273
+ "eval_loss": 7.052780628204346,
274
+ "eval_runtime": 5.263,
275
+ "eval_samples_per_second": 24.321,
276
+ "eval_steps_per_second": 0.38,
277
+ "eval_wer": 1.0,
278
+ "step": 36
279
+ },
280
+ {
281
+ "epoch": 19.0,
282
+ "learning_rate": 9.375e-06,
283
+ "loss": 5.0227,
284
+ "step": 38
285
+ },
286
+ {
287
+ "epoch": 19.0,
288
+ "eval_loss": 6.983470916748047,
289
+ "eval_runtime": 5.2829,
290
+ "eval_samples_per_second": 24.229,
291
+ "eval_steps_per_second": 0.379,
292
+ "eval_wer": 1.0,
293
+ "step": 38
294
+ },
295
+ {
296
+ "epoch": 20.0,
297
+ "learning_rate": 5.6249999999999995e-06,
298
+ "loss": 4.9853,
299
+ "step": 40
300
+ },
301
+ {
302
+ "epoch": 20.0,
303
+ "eval_loss": 6.937458515167236,
304
+ "eval_runtime": 5.3165,
305
+ "eval_samples_per_second": 24.076,
306
+ "eval_steps_per_second": 0.376,
307
+ "eval_wer": 1.0,
308
+ "step": 40
309
+ },
310
+ {
311
+ "epoch": 20.0,
312
+ "step": 40,
313
+ "total_flos": 5.430583918308557e+17,
314
+ "train_loss": 8.69529299736023,
315
+ "train_runtime": 243.8197,
316
+ "train_samples_per_second": 10.5,
317
+ "train_steps_per_second": 0.164
318
  }
319
  ],
320
+ "max_steps": 40,
321
+ "num_train_epochs": 20,
322
+ "total_flos": 5.430583918308557e+17,
323
  "trial_name": null,
324
  "trial_params": null
325
  }