csikasote commited on
Commit
82c0e33
1 Parent(s): fbe0289

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # mms-1b-nyagen-male-model
18
 
19
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.1298
22
  - Wer: 0.1952
 
3
  license: cc-by-nc-4.0
4
  base_model: facebook/mms-1b-all
5
  tags:
6
+ - automatic-speech-recognition
7
+ - nyagen
8
+ - mms
9
  - generated_from_trainer
10
  metrics:
11
  - wer
 
19
 
20
  # mms-1b-nyagen-male-model
21
 
22
+ This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the NYAGEN - BEM dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.1298
25
  - Wer: 0.1952
adapter.bem.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed71cf76df92373a542bd21b91336cfdee644fc61fd826fe0ea89894ed532a98
3
+ size 8798532
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.172413793103448,
3
+ "eval_loss": 0.12979762256145477,
4
+ "eval_runtime": 16.3497,
5
+ "eval_samples": 184,
6
+ "eval_samples_per_second": 11.254,
7
+ "eval_steps_per_second": 2.814,
8
+ "eval_wer": 0.1951987406532861,
9
+ "total_flos": 7.341751672651536e+18,
10
+ "train_loss": 0.5011415631430489,
11
+ "train_runtime": 1836.4834,
12
+ "train_samples": 1624,
13
+ "train_samples_per_second": 26.529,
14
+ "train_steps_per_second": 6.632
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.172413793103448,
3
+ "eval_loss": 0.12979762256145477,
4
+ "eval_runtime": 16.3497,
5
+ "eval_samples": 184,
6
+ "eval_samples_per_second": 11.254,
7
+ "eval_steps_per_second": 2.814,
8
+ "eval_wer": 0.1951987406532861
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.172413793103448,
3
+ "total_flos": 7.341751672651536e+18,
4
+ "train_loss": 0.5011415631430489,
5
+ "train_runtime": 1836.4834,
6
+ "train_samples": 1624,
7
+ "train_samples_per_second": 26.529,
8
+ "train_steps_per_second": 6.632
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.1275225579738617,
3
+ "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-nyagen-male-model/checkpoint-1800",
4
+ "epoch": 5.172413793103448,
5
+ "eval_steps": 100,
6
+ "global_step": 2100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.24630541871921183,
13
+ "grad_norm": 1.8344100713729858,
14
+ "learning_rate": 0.00028799999999999995,
15
+ "loss": 6.345,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.24630541871921183,
20
+ "eval_loss": 0.45057007670402527,
21
+ "eval_runtime": 16.3753,
22
+ "eval_samples_per_second": 11.236,
23
+ "eval_steps_per_second": 2.809,
24
+ "eval_wer": 0.4025974025974026,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.49261083743842365,
29
+ "grad_norm": 1.3397903442382812,
30
+ "learning_rate": 0.0002976158940397351,
31
+ "loss": 0.353,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.49261083743842365,
36
+ "eval_loss": 0.2066199630498886,
37
+ "eval_runtime": 16.5098,
38
+ "eval_samples_per_second": 11.145,
39
+ "eval_steps_per_second": 2.786,
40
+ "eval_wer": 0.2924045651318379,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 0.7389162561576355,
45
+ "grad_norm": 68.3929672241211,
46
+ "learning_rate": 0.00029513245033112583,
47
+ "loss": 0.2989,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 0.7389162561576355,
52
+ "eval_loss": 0.18291255831718445,
53
+ "eval_runtime": 16.4355,
54
+ "eval_samples_per_second": 11.195,
55
+ "eval_steps_per_second": 2.799,
56
+ "eval_wer": 0.2715466351829988,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 0.9852216748768473,
61
+ "grad_norm": 1.0996179580688477,
62
+ "learning_rate": 0.0002926490066225165,
63
+ "loss": 0.2402,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 0.9852216748768473,
68
+ "eval_loss": 0.16742299497127533,
69
+ "eval_runtime": 16.3654,
70
+ "eval_samples_per_second": 11.243,
71
+ "eval_steps_per_second": 2.811,
72
+ "eval_wer": 0.2546241637150728,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 1.2315270935960592,
77
+ "grad_norm": 0.8030900359153748,
78
+ "learning_rate": 0.00029016556291390727,
79
+ "loss": 0.2302,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 1.2315270935960592,
84
+ "eval_loss": 0.15821850299835205,
85
+ "eval_runtime": 16.4101,
86
+ "eval_samples_per_second": 11.213,
87
+ "eval_steps_per_second": 2.803,
88
+ "eval_wer": 0.24360487996851632,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 1.477832512315271,
93
+ "grad_norm": 0.7671955227851868,
94
+ "learning_rate": 0.00028768211920529796,
95
+ "loss": 0.2279,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 1.477832512315271,
100
+ "eval_loss": 0.16019868850708008,
101
+ "eval_runtime": 16.6478,
102
+ "eval_samples_per_second": 11.053,
103
+ "eval_steps_per_second": 2.763,
104
+ "eval_wer": 0.24281778827233372,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 1.7241379310344827,
109
+ "grad_norm": 0.7692348957061768,
110
+ "learning_rate": 0.0002851986754966887,
111
+ "loss": 0.2264,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 1.7241379310344827,
116
+ "eval_loss": 0.15070533752441406,
117
+ "eval_runtime": 16.3912,
118
+ "eval_samples_per_second": 11.226,
119
+ "eval_steps_per_second": 2.806,
120
+ "eval_wer": 0.22865013774104684,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 1.9704433497536946,
125
+ "grad_norm": 0.5601118206977844,
126
+ "learning_rate": 0.00028271523178807946,
127
+ "loss": 0.2276,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 1.9704433497536946,
132
+ "eval_loss": 0.14960433542728424,
133
+ "eval_runtime": 16.3656,
134
+ "eval_samples_per_second": 11.243,
135
+ "eval_steps_per_second": 2.811,
136
+ "eval_wer": 0.23534041715859896,
137
+ "step": 800
138
+ },
139
+ {
140
+ "epoch": 2.2167487684729066,
141
+ "grad_norm": 0.7260679006576538,
142
+ "learning_rate": 0.00028023178807947015,
143
+ "loss": 0.2088,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 2.2167487684729066,
148
+ "eval_loss": 0.1460462063550949,
149
+ "eval_runtime": 16.4242,
150
+ "eval_samples_per_second": 11.203,
151
+ "eval_steps_per_second": 2.801,
152
+ "eval_wer": 0.22077922077922077,
153
+ "step": 900
154
+ },
155
+ {
156
+ "epoch": 2.4630541871921183,
157
+ "grad_norm": 0.4174855649471283,
158
+ "learning_rate": 0.0002777483443708609,
159
+ "loss": 0.1881,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 2.4630541871921183,
164
+ "eval_loss": 0.14552360773086548,
165
+ "eval_runtime": 16.4841,
166
+ "eval_samples_per_second": 11.162,
167
+ "eval_steps_per_second": 2.791,
168
+ "eval_wer": 0.21645021645021645,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 2.70935960591133,
173
+ "grad_norm": 2.906888484954834,
174
+ "learning_rate": 0.00027526490066225165,
175
+ "loss": 0.2079,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 2.70935960591133,
180
+ "eval_loss": 0.14178700745105743,
181
+ "eval_runtime": 16.3484,
182
+ "eval_samples_per_second": 11.255,
183
+ "eval_steps_per_second": 2.814,
184
+ "eval_wer": 0.21684376229830776,
185
+ "step": 1100
186
+ },
187
+ {
188
+ "epoch": 2.955665024630542,
189
+ "grad_norm": 8.858180046081543,
190
+ "learning_rate": 0.0002727814569536424,
191
+ "loss": 0.196,
192
+ "step": 1200
193
+ },
194
+ {
195
+ "epoch": 2.955665024630542,
196
+ "eval_loss": 0.140374094247818,
197
+ "eval_runtime": 16.4414,
198
+ "eval_samples_per_second": 11.191,
199
+ "eval_steps_per_second": 2.798,
200
+ "eval_wer": 0.2085792994883904,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 3.2019704433497536,
205
+ "grad_norm": 0.8277586102485657,
206
+ "learning_rate": 0.0002702980132450331,
207
+ "loss": 0.1782,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 3.2019704433497536,
212
+ "eval_loss": 0.13734222948551178,
213
+ "eval_runtime": 16.5713,
214
+ "eval_samples_per_second": 11.104,
215
+ "eval_steps_per_second": 2.776,
216
+ "eval_wer": 0.2077922077922078,
217
+ "step": 1300
218
+ },
219
+ {
220
+ "epoch": 3.4482758620689653,
221
+ "grad_norm": 0.6015973091125488,
222
+ "learning_rate": 0.0002678145695364238,
223
+ "loss": 0.1741,
224
+ "step": 1400
225
+ },
226
+ {
227
+ "epoch": 3.4482758620689653,
228
+ "eval_loss": 0.13425125181674957,
229
+ "eval_runtime": 16.5286,
230
+ "eval_samples_per_second": 11.132,
231
+ "eval_steps_per_second": 2.783,
232
+ "eval_wer": 0.1944116489571035,
233
+ "step": 1400
234
+ },
235
+ {
236
+ "epoch": 3.6945812807881775,
237
+ "grad_norm": 0.8872548341751099,
238
+ "learning_rate": 0.00026533112582781453,
239
+ "loss": 0.1948,
240
+ "step": 1500
241
+ },
242
+ {
243
+ "epoch": 3.6945812807881775,
244
+ "eval_loss": 0.1318366825580597,
245
+ "eval_runtime": 16.3284,
246
+ "eval_samples_per_second": 11.269,
247
+ "eval_steps_per_second": 2.817,
248
+ "eval_wer": 0.21369539551357733,
249
+ "step": 1500
250
+ },
251
+ {
252
+ "epoch": 3.9408866995073892,
253
+ "grad_norm": 1.193844199180603,
254
+ "learning_rate": 0.0002628476821192053,
255
+ "loss": 0.1904,
256
+ "step": 1600
257
+ },
258
+ {
259
+ "epoch": 3.9408866995073892,
260
+ "eval_loss": 0.13069555163383484,
261
+ "eval_runtime": 16.3618,
262
+ "eval_samples_per_second": 11.246,
263
+ "eval_steps_per_second": 2.811,
264
+ "eval_wer": 0.20425029515938606,
265
+ "step": 1600
266
+ },
267
+ {
268
+ "epoch": 4.187192118226601,
269
+ "grad_norm": 0.6119738221168518,
270
+ "learning_rate": 0.000260364238410596,
271
+ "loss": 0.1762,
272
+ "step": 1700
273
+ },
274
+ {
275
+ "epoch": 4.187192118226601,
276
+ "eval_loss": 0.13134504854679108,
277
+ "eval_runtime": 16.468,
278
+ "eval_samples_per_second": 11.173,
279
+ "eval_steps_per_second": 2.793,
280
+ "eval_wer": 0.20031483667847305,
281
+ "step": 1700
282
+ },
283
+ {
284
+ "epoch": 4.433497536945813,
285
+ "grad_norm": 0.7769166827201843,
286
+ "learning_rate": 0.0002578807947019867,
287
+ "loss": 0.1718,
288
+ "step": 1800
289
+ },
290
+ {
291
+ "epoch": 4.433497536945813,
292
+ "eval_loss": 0.1275225579738617,
293
+ "eval_runtime": 16.422,
294
+ "eval_samples_per_second": 11.204,
295
+ "eval_steps_per_second": 2.801,
296
+ "eval_wer": 0.19323101141282958,
297
+ "step": 1800
298
+ },
299
+ {
300
+ "epoch": 4.679802955665025,
301
+ "grad_norm": 0.7259223461151123,
302
+ "learning_rate": 0.00025539735099337746,
303
+ "loss": 0.1595,
304
+ "step": 1900
305
+ },
306
+ {
307
+ "epoch": 4.679802955665025,
308
+ "eval_loss": 0.1275661438703537,
309
+ "eval_runtime": 16.3823,
310
+ "eval_samples_per_second": 11.232,
311
+ "eval_steps_per_second": 2.808,
312
+ "eval_wer": 0.1951987406532861,
313
+ "step": 1900
314
+ },
315
+ {
316
+ "epoch": 4.926108374384237,
317
+ "grad_norm": 0.5679446458816528,
318
+ "learning_rate": 0.0002529139072847682,
319
+ "loss": 0.1811,
320
+ "step": 2000
321
+ },
322
+ {
323
+ "epoch": 4.926108374384237,
324
+ "eval_loss": 0.12904822826385498,
325
+ "eval_runtime": 16.3388,
326
+ "eval_samples_per_second": 11.262,
327
+ "eval_steps_per_second": 2.815,
328
+ "eval_wer": 0.19834710743801653,
329
+ "step": 2000
330
+ },
331
+ {
332
+ "epoch": 5.172413793103448,
333
+ "grad_norm": 0.6480135917663574,
334
+ "learning_rate": 0.00025043046357615896,
335
+ "loss": 0.1477,
336
+ "step": 2100
337
+ },
338
+ {
339
+ "epoch": 5.172413793103448,
340
+ "eval_loss": 0.12981154024600983,
341
+ "eval_runtime": 16.4531,
342
+ "eval_samples_per_second": 11.183,
343
+ "eval_steps_per_second": 2.796,
344
+ "eval_wer": 0.1951987406532861,
345
+ "step": 2100
346
+ },
347
+ {
348
+ "epoch": 5.172413793103448,
349
+ "step": 2100,
350
+ "total_flos": 7.341751672651536e+18,
351
+ "train_loss": 0.5011415631430489,
352
+ "train_runtime": 1836.4834,
353
+ "train_samples_per_second": 26.529,
354
+ "train_steps_per_second": 6.632
355
+ }
356
+ ],
357
+ "logging_steps": 100,
358
+ "max_steps": 12180,
359
+ "num_input_tokens_seen": 0,
360
+ "num_train_epochs": 30,
361
+ "save_steps": 400,
362
+ "stateful_callbacks": {
363
+ "EarlyStoppingCallback": {
364
+ "args": {
365
+ "early_stopping_patience": 3,
366
+ "early_stopping_threshold": 0.0
367
+ },
368
+ "attributes": {
369
+ "early_stopping_patience_counter": 2
370
+ }
371
+ },
372
+ "TrainerControl": {
373
+ "args": {
374
+ "should_epoch_stop": false,
375
+ "should_evaluate": false,
376
+ "should_log": false,
377
+ "should_save": true,
378
+ "should_training_stop": false
379
+ },
380
+ "attributes": {}
381
+ }
382
+ },
383
+ "total_flos": 7.341751672651536e+18,
384
+ "train_batch_size": 4,
385
+ "trial_name": null,
386
+ "trial_params": null
387
+ }