csikasote commited on
Commit
c38ed4a
1 Parent(s): 4a81027

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # mms-1b-lozgen-male-model
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.3107
22
  - Wer: 0.3264
 
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
6
+ - automatic-speech-recognition
7
+ - lozgen
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # mms-1b-lozgen-male-model
21
 
22
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the LOZGEN - LOZ dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.3107
25
  - Wer: 0.3264
adapter.loz.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:719346a913c210de2e18d64b15a83d455e2a387e395d9384b15885948d0d4718
3
+ size 8793408
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.03053435114504,
3
+ "eval_loss": 0.3107261061668396,
4
+ "eval_runtime": 20.2286,
5
+ "eval_samples": 304,
6
+ "eval_samples_per_second": 15.028,
7
+ "eval_steps_per_second": 3.757,
8
+ "eval_wer": 0.3263701155074957,
9
+ "total_flos": 5.844277956095999e+18,
10
+ "train_loss": 0.8914755775814964,
11
+ "train_runtime": 1802.4751,
12
+ "train_samples": 524,
13
+ "train_samples_per_second": 8.721,
14
+ "train_steps_per_second": 2.18
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.03053435114504,
3
+ "eval_loss": 0.3107261061668396,
4
+ "eval_runtime": 20.2286,
5
+ "eval_samples": 304,
6
+ "eval_samples_per_second": 15.028,
7
+ "eval_steps_per_second": 3.757,
8
+ "eval_wer": 0.3263701155074957
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.03053435114504,
3
+ "total_flos": 5.844277956095999e+18,
4
+ "train_loss": 0.8914755775814964,
5
+ "train_runtime": 1802.4751,
6
+ "train_samples": 524,
7
+ "train_samples_per_second": 8.721,
8
+ "train_steps_per_second": 2.18
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.30884110927581787,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-lozgen-male-model/checkpoint-1800",
4
+ "epoch": 16.03053435114504,
5
+ "eval_steps": 100,
6
+ "global_step": 2100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7633587786259542,
13
+ "grad_norm": 2.278857707977295,
14
+ "learning_rate": 0.000285,
15
+ "loss": 6.7665,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.7633587786259542,
20
+ "eval_loss": 3.1609649658203125,
21
+ "eval_runtime": 20.525,
22
+ "eval_samples_per_second": 14.811,
23
+ "eval_steps_per_second": 3.703,
24
+ "eval_wer": 0.9990169574834111,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 1.5267175572519083,
29
+ "grad_norm": 2.781928062438965,
30
+ "learning_rate": 0.00029255874673629243,
31
+ "loss": 2.6476,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 1.5267175572519083,
36
+ "eval_loss": 2.2410073280334473,
37
+ "eval_runtime": 20.3251,
38
+ "eval_samples_per_second": 14.957,
39
+ "eval_steps_per_second": 3.739,
40
+ "eval_wer": 0.8953059719832883,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 2.2900763358778624,
45
+ "grad_norm": 2.653702974319458,
46
+ "learning_rate": 0.00028472584856396865,
47
+ "loss": 1.5694,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 2.2900763358778624,
52
+ "eval_loss": 0.5584537386894226,
53
+ "eval_runtime": 20.2001,
54
+ "eval_samples_per_second": 15.049,
55
+ "eval_steps_per_second": 3.762,
56
+ "eval_wer": 0.7598918653231752,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 3.053435114503817,
61
+ "grad_norm": 1.1257972717285156,
62
+ "learning_rate": 0.0002768929503916449,
63
+ "loss": 0.6761,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 3.053435114503817,
68
+ "eval_loss": 0.4565717875957489,
69
+ "eval_runtime": 20.2697,
70
+ "eval_samples_per_second": 14.998,
71
+ "eval_steps_per_second": 3.749,
72
+ "eval_wer": 0.6318505775374785,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 3.816793893129771,
77
+ "grad_norm": 0.982060968875885,
78
+ "learning_rate": 0.0002690600522193211,
79
+ "loss": 0.5793,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 3.816793893129771,
84
+ "eval_loss": 0.4013397693634033,
85
+ "eval_runtime": 20.4119,
86
+ "eval_samples_per_second": 14.893,
87
+ "eval_steps_per_second": 3.723,
88
+ "eval_wer": 0.4821823543868272,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 4.580152671755725,
93
+ "grad_norm": 0.9821244478225708,
94
+ "learning_rate": 0.0002612271540469974,
95
+ "loss": 0.5392,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 4.580152671755725,
100
+ "eval_loss": 0.3795173168182373,
101
+ "eval_runtime": 20.3646,
102
+ "eval_samples_per_second": 14.928,
103
+ "eval_steps_per_second": 3.732,
104
+ "eval_wer": 0.4553944458097813,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 5.34351145038168,
109
+ "grad_norm": 1.2693768739700317,
110
+ "learning_rate": 0.0002533942558746736,
111
+ "loss": 0.4809,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 5.34351145038168,
116
+ "eval_loss": 0.37301722168922424,
117
+ "eval_runtime": 20.1691,
118
+ "eval_samples_per_second": 15.073,
119
+ "eval_steps_per_second": 3.768,
120
+ "eval_wer": 0.4332759891865323,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 6.106870229007634,
125
+ "grad_norm": 1.0504028797149658,
126
+ "learning_rate": 0.0002455613577023499,
127
+ "loss": 0.4813,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 6.106870229007634,
132
+ "eval_loss": 0.35973772406578064,
133
+ "eval_runtime": 20.1824,
134
+ "eval_samples_per_second": 15.063,
135
+ "eval_steps_per_second": 3.766,
136
+ "eval_wer": 0.4229540427623495,
137
+ "step": 800
138
+ },
139
+ {
140
+ "epoch": 6.870229007633588,
141
+ "grad_norm": 1.0751953125,
142
+ "learning_rate": 0.0002377284595300261,
143
+ "loss": 0.4484,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 6.870229007633588,
148
+ "eval_loss": 0.3431786596775055,
149
+ "eval_runtime": 20.3009,
150
+ "eval_samples_per_second": 14.975,
151
+ "eval_steps_per_second": 3.744,
152
+ "eval_wer": 0.39247972474809534,
153
+ "step": 900
154
+ },
155
+ {
156
+ "epoch": 7.633587786259542,
157
+ "grad_norm": 3.291668176651001,
158
+ "learning_rate": 0.00022989556135770233,
159
+ "loss": 0.4418,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 7.633587786259542,
164
+ "eval_loss": 0.3390863239765167,
165
+ "eval_runtime": 20.3166,
166
+ "eval_samples_per_second": 14.963,
167
+ "eval_steps_per_second": 3.741,
168
+ "eval_wer": 0.39469157041042024,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 8.396946564885496,
173
+ "grad_norm": 1.688865303993225,
174
+ "learning_rate": 0.00022206266318537858,
175
+ "loss": 0.4322,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 8.396946564885496,
180
+ "eval_loss": 0.33392783999443054,
181
+ "eval_runtime": 20.3067,
182
+ "eval_samples_per_second": 14.97,
183
+ "eval_steps_per_second": 3.743,
184
+ "eval_wer": 0.3841238633570902,
185
+ "step": 1100
186
+ },
187
+ {
188
+ "epoch": 9.16030534351145,
189
+ "grad_norm": 1.2780994176864624,
190
+ "learning_rate": 0.0002142297650130548,
191
+ "loss": 0.3963,
192
+ "step": 1200
193
+ },
194
+ {
195
+ "epoch": 9.16030534351145,
196
+ "eval_loss": 0.3294471502304077,
197
+ "eval_runtime": 20.282,
198
+ "eval_samples_per_second": 14.989,
199
+ "eval_steps_per_second": 3.747,
200
+ "eval_wer": 0.36692061931678543,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 9.923664122137405,
205
+ "grad_norm": 1.1817948818206787,
206
+ "learning_rate": 0.00020639686684073108,
207
+ "loss": 0.4104,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 9.923664122137405,
212
+ "eval_loss": 0.321709543466568,
213
+ "eval_runtime": 20.3324,
214
+ "eval_samples_per_second": 14.952,
215
+ "eval_steps_per_second": 3.738,
216
+ "eval_wer": 0.3634799705087245,
217
+ "step": 1300
218
+ },
219
+ {
220
+ "epoch": 10.68702290076336,
221
+ "grad_norm": 2.474210262298584,
222
+ "learning_rate": 0.0001985639686684073,
223
+ "loss": 0.3777,
224
+ "step": 1400
225
+ },
226
+ {
227
+ "epoch": 10.68702290076336,
228
+ "eval_loss": 0.3176502287387848,
229
+ "eval_runtime": 20.3222,
230
+ "eval_samples_per_second": 14.959,
231
+ "eval_steps_per_second": 3.74,
232
+ "eval_wer": 0.3610223642172524,
233
+ "step": 1400
234
+ },
235
+ {
236
+ "epoch": 11.450381679389313,
237
+ "grad_norm": 1.456008791923523,
238
+ "learning_rate": 0.00019073107049608353,
239
+ "loss": 0.3785,
240
+ "step": 1500
241
+ },
242
+ {
243
+ "epoch": 11.450381679389313,
244
+ "eval_loss": 0.32359373569488525,
245
+ "eval_runtime": 20.3296,
246
+ "eval_samples_per_second": 14.954,
247
+ "eval_steps_per_second": 3.738,
248
+ "eval_wer": 0.35389530597198327,
249
+ "step": 1500
250
+ },
251
+ {
252
+ "epoch": 12.213740458015268,
253
+ "grad_norm": 1.4315876960754395,
254
+ "learning_rate": 0.00018289817232375978,
255
+ "loss": 0.3682,
256
+ "step": 1600
257
+ },
258
+ {
259
+ "epoch": 12.213740458015268,
260
+ "eval_loss": 0.31440281867980957,
261
+ "eval_runtime": 20.1701,
262
+ "eval_samples_per_second": 15.072,
263
+ "eval_steps_per_second": 3.768,
264
+ "eval_wer": 0.34676824772671416,
265
+ "step": 1600
266
+ },
267
+ {
268
+ "epoch": 12.977099236641221,
269
+ "grad_norm": 2.2636640071868896,
270
+ "learning_rate": 0.000175065274151436,
271
+ "loss": 0.3654,
272
+ "step": 1700
273
+ },
274
+ {
275
+ "epoch": 12.977099236641221,
276
+ "eval_loss": 0.31223368644714355,
277
+ "eval_runtime": 20.3687,
278
+ "eval_samples_per_second": 14.925,
279
+ "eval_steps_per_second": 3.731,
280
+ "eval_wer": 0.35291226345539445,
281
+ "step": 1700
282
+ },
283
+ {
284
+ "epoch": 13.740458015267176,
285
+ "grad_norm": 0.9211711883544922,
286
+ "learning_rate": 0.00016723237597911225,
287
+ "loss": 0.3509,
288
+ "step": 1800
289
+ },
290
+ {
291
+ "epoch": 13.740458015267176,
292
+ "eval_loss": 0.30884110927581787,
293
+ "eval_runtime": 20.3386,
294
+ "eval_samples_per_second": 14.947,
295
+ "eval_steps_per_second": 3.737,
296
+ "eval_wer": 0.3462767264684198,
297
+ "step": 1800
298
+ },
299
+ {
300
+ "epoch": 14.50381679389313,
301
+ "grad_norm": 2.6637320518493652,
302
+ "learning_rate": 0.0001593994778067885,
303
+ "loss": 0.3412,
304
+ "step": 1900
305
+ },
306
+ {
307
+ "epoch": 14.50381679389313,
308
+ "eval_loss": 0.314583420753479,
309
+ "eval_runtime": 20.1788,
310
+ "eval_samples_per_second": 15.065,
311
+ "eval_steps_per_second": 3.766,
312
+ "eval_wer": 0.33472597689850087,
313
+ "step": 1900
314
+ },
315
+ {
316
+ "epoch": 15.267175572519085,
317
+ "grad_norm": 0.9389367699623108,
318
+ "learning_rate": 0.00015156657963446475,
319
+ "loss": 0.3344,
320
+ "step": 2000
321
+ },
322
+ {
323
+ "epoch": 15.267175572519085,
324
+ "eval_loss": 0.31076034903526306,
325
+ "eval_runtime": 20.2164,
326
+ "eval_samples_per_second": 15.037,
327
+ "eval_steps_per_second": 3.759,
328
+ "eval_wer": 0.34160727451462275,
329
+ "step": 2000
330
+ },
331
+ {
332
+ "epoch": 16.03053435114504,
333
+ "grad_norm": 0.9784950017929077,
334
+ "learning_rate": 0.00014373368146214098,
335
+ "loss": 0.3351,
336
+ "step": 2100
337
+ },
338
+ {
339
+ "epoch": 16.03053435114504,
340
+ "eval_loss": 0.3107351064682007,
341
+ "eval_runtime": 20.3863,
342
+ "eval_samples_per_second": 14.912,
343
+ "eval_steps_per_second": 3.728,
344
+ "eval_wer": 0.3263701155074957,
345
+ "step": 2100
346
+ },
347
+ {
348
+ "epoch": 16.03053435114504,
349
+ "step": 2100,
350
+ "total_flos": 5.844277956095999e+18,
351
+ "train_loss": 0.8914755775814964,
352
+ "train_runtime": 1802.4751,
353
+ "train_samples_per_second": 8.721,
354
+ "train_steps_per_second": 2.18
355
+ }
356
+ ],
357
+ "logging_steps": 100,
358
+ "max_steps": 3930,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 30,
361
+ "save_steps": 400,
362
+ "stateful_callbacks": {
363
+ "EarlyStoppingCallback": {
364
+ "args": {
365
+ "early_stopping_patience": 3,
366
+ "early_stopping_threshold": 0.0
367
+ },
368
+ "attributes": {
369
+ "early_stopping_patience_counter": 2
370
+ }
371
+ },
372
+ "TrainerControl": {
373
+ "args": {
374
+ "should_epoch_stop": false,
375
+ "should_evaluate": false,
376
+ "should_log": false,
377
+ "should_save": true,
378
+ "should_training_stop": false
379
+ },
380
+ "attributes": {}
381
+ }
382
+ },
383
+ "total_flos": 5.844277956095999e+18,
384
+ "train_batch_size": 4,
385
+ "trial_name": null,
386
+ "trial_params": null
387
+ }