kmknair commited on
Commit
9a57891
1 Parent(s): 7a643c0
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 57, "</s>": 58}
config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
+ "activation_dropout": 0.0,
4
+ "apply_spec_augment": true,
5
+ "architectures": [
6
+ "Wav2Vec2ForCTC"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "codevector_dim": 768,
12
+ "contrastive_logits_temperature": 0.1,
13
+ "conv_bias": true,
14
+ "conv_dim": [
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512
22
+ ],
23
+ "conv_kernel": [
24
+ 10,
25
+ 3,
26
+ 3,
27
+ 3,
28
+ 3,
29
+ 2,
30
+ 2
31
+ ],
32
+ "conv_stride": [
33
+ 5,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2,
38
+ 2,
39
+ 2
40
+ ],
41
+ "ctc_loss_reduction": "mean",
42
+ "ctc_zero_infinity": false,
43
+ "diversity_loss_weight": 0.1,
44
+ "do_stable_layer_norm": true,
45
+ "eos_token_id": 2,
46
+ "feat_extract_activation": "gelu",
47
+ "feat_extract_dropout": 0.0,
48
+ "feat_extract_norm": "layer",
49
+ "feat_proj_dropout": 0.0,
50
+ "feat_quantizer_dropout": 0.0,
51
+ "final_dropout": 0.0,
52
+ "gradient_checkpointing": false,
53
+ "hidden_act": "gelu",
54
+ "hidden_dropout": 0.0,
55
+ "hidden_size": 1024,
56
+ "initializer_range": 0.02,
57
+ "intermediate_size": 4096,
58
+ "layer_norm_eps": 1e-05,
59
+ "layerdrop": 0.0,
60
+ "mask_feature_length": 10,
61
+ "mask_feature_prob": 0.0,
62
+ "mask_time_length": 10,
63
+ "mask_time_prob": 0.05,
64
+ "model_type": "wav2vec2",
65
+ "num_attention_heads": 16,
66
+ "num_codevector_groups": 2,
67
+ "num_codevectors_per_group": 320,
68
+ "num_conv_pos_embedding_groups": 16,
69
+ "num_conv_pos_embeddings": 128,
70
+ "num_feat_extract_layers": 7,
71
+ "num_hidden_layers": 24,
72
+ "num_negatives": 100,
73
+ "pad_token_id": 56,
74
+ "proj_codevector_dim": 768,
75
+ "torch_dtype": "float32",
76
+ "transformers_version": "4.11.3",
77
+ "use_weighted_layer_sum": false,
78
+ "vocab_size": 59
79
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df322f29efb40c4b9aaca06163c4400e11a966706c988c8420cf5b7eb76f7ca7
3
+ size 2490542737
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40654ac8eade44526911720d8b5ffc0ac1f26e80d60b111c9277e72875eb7ee6
3
+ size 1262165553
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13ba5ad3d7125d7ed0e89ccc945b569066774d7a233a162b9850a0d5ff7c2203
3
+ size 14567
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2be64a1fdf6be02c40ff882716106a4a35ae10386dde590d5a8422b2a617460e
3
+ size 559
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf1474db9da7292cef06a0c8fffdb14e50ead1b7400ea711fb843bc70078edc
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./wav2vec2-large-xls-r-300m-arabic-colab", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
trainer_state.json ADDED
@@ -0,0 +1,766 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 29.454746136865342,
5
+ "global_step": 20000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.59,
12
+ "learning_rate": 0.0002382,
13
+ "loss": 6.1211,
14
+ "step": 400
15
+ },
16
+ {
17
+ "epoch": 0.59,
18
+ "eval_loss": 3.2328364849090576,
19
+ "eval_runtime": 546.0856,
20
+ "eval_samples_per_second": 13.958,
21
+ "eval_steps_per_second": 1.745,
22
+ "eval_wer": 0.9991302123305194,
23
+ "step": 400
24
+ },
25
+ {
26
+ "epoch": 1.18,
27
+ "learning_rate": 0.0002955158530447911,
28
+ "loss": 2.304,
29
+ "step": 800
30
+ },
31
+ {
32
+ "epoch": 1.18,
33
+ "eval_loss": 1.0820916891098022,
34
+ "eval_runtime": 551.5198,
35
+ "eval_samples_per_second": 13.82,
36
+ "eval_steps_per_second": 1.728,
37
+ "eval_wer": 0.8043745203376823,
38
+ "step": 800
39
+ },
40
+ {
41
+ "epoch": 1.77,
42
+ "learning_rate": 0.0002894765978862607,
43
+ "loss": 0.9673,
44
+ "step": 1200
45
+ },
46
+ {
47
+ "epoch": 1.77,
48
+ "eval_loss": 0.7271230816841125,
49
+ "eval_runtime": 547.9451,
50
+ "eval_samples_per_second": 13.91,
51
+ "eval_steps_per_second": 1.739,
52
+ "eval_wer": 0.6917881811204911,
53
+ "step": 1200
54
+ },
55
+ {
56
+ "epoch": 2.36,
57
+ "learning_rate": 0.0002834373427277302,
58
+ "loss": 0.7607,
59
+ "step": 1600
60
+ },
61
+ {
62
+ "epoch": 2.36,
63
+ "eval_loss": 0.6257076263427734,
64
+ "eval_runtime": 545.1866,
65
+ "eval_samples_per_second": 13.981,
66
+ "eval_steps_per_second": 1.748,
67
+ "eval_wer": 0.6329751854694295,
68
+ "step": 1600
69
+ },
70
+ {
71
+ "epoch": 2.94,
72
+ "learning_rate": 0.00027739808756919976,
73
+ "loss": 0.689,
74
+ "step": 2000
75
+ },
76
+ {
77
+ "epoch": 2.94,
78
+ "eval_loss": 0.5595377683639526,
79
+ "eval_runtime": 545.7121,
80
+ "eval_samples_per_second": 13.967,
81
+ "eval_steps_per_second": 1.746,
82
+ "eval_wer": 0.6035303146584804,
83
+ "step": 2000
84
+ },
85
+ {
86
+ "epoch": 3.53,
87
+ "learning_rate": 0.00027135883241066934,
88
+ "loss": 0.5775,
89
+ "step": 2400
90
+ },
91
+ {
92
+ "epoch": 3.53,
93
+ "eval_loss": 0.582660973072052,
94
+ "eval_runtime": 547.0999,
95
+ "eval_samples_per_second": 13.932,
96
+ "eval_steps_per_second": 1.742,
97
+ "eval_wer": 0.6055257099002302,
98
+ "step": 2400
99
+ },
100
+ {
101
+ "epoch": 4.12,
102
+ "learning_rate": 0.00026531957725213886,
103
+ "loss": 0.5621,
104
+ "step": 2800
105
+ },
106
+ {
107
+ "epoch": 4.12,
108
+ "eval_loss": 0.5549562573432922,
109
+ "eval_runtime": 546.696,
110
+ "eval_samples_per_second": 13.942,
111
+ "eval_steps_per_second": 1.743,
112
+ "eval_wer": 0.5691992837042722,
113
+ "step": 2800
114
+ },
115
+ {
116
+ "epoch": 4.71,
117
+ "learning_rate": 0.00025928032209360843,
118
+ "loss": 0.5014,
119
+ "step": 3200
120
+ },
121
+ {
122
+ "epoch": 4.71,
123
+ "eval_loss": 0.5490128993988037,
124
+ "eval_runtime": 542.4177,
125
+ "eval_samples_per_second": 14.052,
126
+ "eval_steps_per_second": 1.757,
127
+ "eval_wer": 0.5637503197748784,
128
+ "step": 3200
129
+ },
130
+ {
131
+ "epoch": 5.3,
132
+ "learning_rate": 0.000253241066935078,
133
+ "loss": 0.4781,
134
+ "step": 3600
135
+ },
136
+ {
137
+ "epoch": 5.3,
138
+ "eval_loss": 0.5757995247840881,
139
+ "eval_runtime": 543.8318,
140
+ "eval_samples_per_second": 14.015,
141
+ "eval_steps_per_second": 1.752,
142
+ "eval_wer": 0.5655154771041186,
143
+ "step": 3600
144
+ },
145
+ {
146
+ "epoch": 5.89,
147
+ "learning_rate": 0.0002472018117765476,
148
+ "loss": 0.4499,
149
+ "step": 4000
150
+ },
151
+ {
152
+ "epoch": 5.89,
153
+ "eval_loss": 0.5555837154388428,
154
+ "eval_runtime": 545.7873,
155
+ "eval_samples_per_second": 13.965,
156
+ "eval_steps_per_second": 1.746,
157
+ "eval_wer": 0.5522640061396776,
158
+ "step": 4000
159
+ },
160
+ {
161
+ "epoch": 6.48,
162
+ "learning_rate": 0.0002411625566180171,
163
+ "loss": 0.4095,
164
+ "step": 4400
165
+ },
166
+ {
167
+ "epoch": 6.48,
168
+ "eval_loss": 0.5786208510398865,
169
+ "eval_runtime": 543.989,
170
+ "eval_samples_per_second": 14.011,
171
+ "eval_steps_per_second": 1.752,
172
+ "eval_wer": 0.5524942440521873,
173
+ "step": 4400
174
+ },
175
+ {
176
+ "epoch": 7.07,
177
+ "learning_rate": 0.00023512330145948666,
178
+ "loss": 0.4003,
179
+ "step": 4800
180
+ },
181
+ {
182
+ "epoch": 7.07,
183
+ "eval_loss": 0.5860427021980286,
184
+ "eval_runtime": 549.6883,
185
+ "eval_samples_per_second": 13.866,
186
+ "eval_steps_per_second": 1.734,
187
+ "eval_wer": 0.5389613711946789,
188
+ "step": 4800
189
+ },
190
+ {
191
+ "epoch": 7.66,
192
+ "learning_rate": 0.00022908404630095618,
193
+ "loss": 0.3653,
194
+ "step": 5200
195
+ },
196
+ {
197
+ "epoch": 7.66,
198
+ "eval_loss": 0.5734272003173828,
199
+ "eval_runtime": 542.4405,
200
+ "eval_samples_per_second": 14.051,
201
+ "eval_steps_per_second": 1.757,
202
+ "eval_wer": 0.5064466615502686,
203
+ "step": 5200
204
+ },
205
+ {
206
+ "epoch": 8.25,
207
+ "learning_rate": 0.00022304479114242576,
208
+ "loss": 0.3454,
209
+ "step": 5600
210
+ },
211
+ {
212
+ "epoch": 8.25,
213
+ "eval_loss": 0.5864331126213074,
214
+ "eval_runtime": 543.9245,
215
+ "eval_samples_per_second": 14.013,
216
+ "eval_steps_per_second": 1.752,
217
+ "eval_wer": 0.4945510360706063,
218
+ "step": 5600
219
+ },
220
+ {
221
+ "epoch": 8.84,
222
+ "learning_rate": 0.0002170055359838953,
223
+ "loss": 0.3223,
224
+ "step": 6000
225
+ },
226
+ {
227
+ "epoch": 8.84,
228
+ "eval_loss": 0.5884710550308228,
229
+ "eval_runtime": 548.5513,
230
+ "eval_samples_per_second": 13.895,
231
+ "eval_steps_per_second": 1.737,
232
+ "eval_wer": 0.500332565873625,
233
+ "step": 6000
234
+ },
235
+ {
236
+ "epoch": 9.43,
237
+ "learning_rate": 0.00021096628082536487,
238
+ "loss": 0.2897,
239
+ "step": 6400
240
+ },
241
+ {
242
+ "epoch": 9.43,
243
+ "eval_loss": 0.6017025113105774,
244
+ "eval_runtime": 545.222,
245
+ "eval_samples_per_second": 13.98,
246
+ "eval_steps_per_second": 1.748,
247
+ "eval_wer": 0.49053466359682785,
248
+ "step": 6400
249
+ },
250
+ {
251
+ "epoch": 10.01,
252
+ "learning_rate": 0.0002049270256668344,
253
+ "loss": 0.289,
254
+ "step": 6800
255
+ },
256
+ {
257
+ "epoch": 10.01,
258
+ "eval_loss": 0.63252192735672,
259
+ "eval_runtime": 547.5169,
260
+ "eval_samples_per_second": 13.921,
261
+ "eval_steps_per_second": 1.741,
262
+ "eval_wer": 0.4930928626247122,
263
+ "step": 6800
264
+ },
265
+ {
266
+ "epoch": 10.6,
267
+ "learning_rate": 0.00019888777050830396,
268
+ "loss": 0.2488,
269
+ "step": 7200
270
+ },
271
+ {
272
+ "epoch": 10.6,
273
+ "eval_loss": 0.6799584627151489,
274
+ "eval_runtime": 541.9978,
275
+ "eval_samples_per_second": 14.063,
276
+ "eval_steps_per_second": 1.758,
277
+ "eval_wer": 0.49833717063187516,
278
+ "step": 7200
279
+ },
280
+ {
281
+ "epoch": 11.19,
282
+ "learning_rate": 0.0001928485153497735,
283
+ "loss": 0.2361,
284
+ "step": 7600
285
+ },
286
+ {
287
+ "epoch": 11.19,
288
+ "eval_loss": 0.6552415490150452,
289
+ "eval_runtime": 544.0722,
290
+ "eval_samples_per_second": 14.009,
291
+ "eval_steps_per_second": 1.752,
292
+ "eval_wer": 0.5002046559222307,
293
+ "step": 7600
294
+ },
295
+ {
296
+ "epoch": 11.78,
297
+ "learning_rate": 0.00018682435832913938,
298
+ "loss": 0.2275,
299
+ "step": 8000
300
+ },
301
+ {
302
+ "epoch": 11.78,
303
+ "eval_loss": 0.6828446984291077,
304
+ "eval_runtime": 543.1333,
305
+ "eval_samples_per_second": 14.033,
306
+ "eval_steps_per_second": 1.755,
307
+ "eval_wer": 0.4898183678690202,
308
+ "step": 8000
309
+ },
310
+ {
311
+ "epoch": 12.37,
312
+ "learning_rate": 0.00018078510317060895,
313
+ "loss": 0.2109,
314
+ "step": 8400
315
+ },
316
+ {
317
+ "epoch": 12.37,
318
+ "eval_loss": 0.6952915787696838,
319
+ "eval_runtime": 542.929,
320
+ "eval_samples_per_second": 14.039,
321
+ "eval_steps_per_second": 1.755,
322
+ "eval_wer": 0.4861601432591456,
323
+ "step": 8400
324
+ },
325
+ {
326
+ "epoch": 12.96,
327
+ "learning_rate": 0.0001747458480120785,
328
+ "loss": 0.2061,
329
+ "step": 8800
330
+ },
331
+ {
332
+ "epoch": 12.96,
333
+ "eval_loss": 0.6886131167411804,
334
+ "eval_runtime": 544.2666,
335
+ "eval_samples_per_second": 14.004,
336
+ "eval_steps_per_second": 1.751,
337
+ "eval_wer": 0.4692504476848299,
338
+ "step": 8800
339
+ },
340
+ {
341
+ "epoch": 13.55,
342
+ "learning_rate": 0.00016870659285354804,
343
+ "loss": 0.1874,
344
+ "step": 9200
345
+ },
346
+ {
347
+ "epoch": 13.55,
348
+ "eval_loss": 0.7013294696807861,
349
+ "eval_runtime": 543.7236,
350
+ "eval_samples_per_second": 14.018,
351
+ "eval_steps_per_second": 1.753,
352
+ "eval_wer": 0.47367613200306985,
353
+ "step": 9200
354
+ },
355
+ {
356
+ "epoch": 14.14,
357
+ "learning_rate": 0.00016266733769501759,
358
+ "loss": 0.1824,
359
+ "step": 9600
360
+ },
361
+ {
362
+ "epoch": 14.14,
363
+ "eval_loss": 0.7199532985687256,
364
+ "eval_runtime": 545.3665,
365
+ "eval_samples_per_second": 13.976,
366
+ "eval_steps_per_second": 1.747,
367
+ "eval_wer": 0.4730621642363776,
368
+ "step": 9600
369
+ },
370
+ {
371
+ "epoch": 14.73,
372
+ "learning_rate": 0.00015662808253648716,
373
+ "loss": 0.1773,
374
+ "step": 10000
375
+ },
376
+ {
377
+ "epoch": 14.73,
378
+ "eval_loss": 0.6803578734397888,
379
+ "eval_runtime": 541.6325,
380
+ "eval_samples_per_second": 14.072,
381
+ "eval_steps_per_second": 1.759,
382
+ "eval_wer": 0.47045280122793554,
383
+ "step": 10000
384
+ },
385
+ {
386
+ "epoch": 15.32,
387
+ "learning_rate": 0.00015058882737795668,
388
+ "loss": 0.1663,
389
+ "step": 10400
390
+ },
391
+ {
392
+ "epoch": 15.32,
393
+ "eval_loss": 0.6929047703742981,
394
+ "eval_runtime": 543.7575,
395
+ "eval_samples_per_second": 14.017,
396
+ "eval_steps_per_second": 1.753,
397
+ "eval_wer": 0.4615246866206191,
398
+ "step": 10400
399
+ },
400
+ {
401
+ "epoch": 15.91,
402
+ "learning_rate": 0.00014454957221942625,
403
+ "loss": 0.1529,
404
+ "step": 10800
405
+ },
406
+ {
407
+ "epoch": 15.91,
408
+ "eval_loss": 0.7400447130203247,
409
+ "eval_runtime": 541.8998,
410
+ "eval_samples_per_second": 14.065,
411
+ "eval_steps_per_second": 1.759,
412
+ "eval_wer": 0.4675364543361474,
413
+ "step": 10800
414
+ },
415
+ {
416
+ "epoch": 16.49,
417
+ "learning_rate": 0.00013851031706089582,
418
+ "loss": 0.1406,
419
+ "step": 11200
420
+ },
421
+ {
422
+ "epoch": 16.49,
423
+ "eval_loss": 0.7907389998435974,
424
+ "eval_runtime": 543.7393,
425
+ "eval_samples_per_second": 14.018,
426
+ "eval_steps_per_second": 1.753,
427
+ "eval_wer": 0.46472243540547453,
428
+ "step": 11200
429
+ },
430
+ {
431
+ "epoch": 17.08,
432
+ "learning_rate": 0.0001324861600402617,
433
+ "loss": 0.1376,
434
+ "step": 11600
435
+ },
436
+ {
437
+ "epoch": 17.08,
438
+ "eval_loss": 0.8006933927536011,
439
+ "eval_runtime": 543.2472,
440
+ "eval_samples_per_second": 14.03,
441
+ "eval_steps_per_second": 1.754,
442
+ "eval_wer": 0.46689690457917626,
443
+ "step": 11600
444
+ },
445
+ {
446
+ "epoch": 17.67,
447
+ "learning_rate": 0.00012644690488173123,
448
+ "loss": 0.1273,
449
+ "step": 12000
450
+ },
451
+ {
452
+ "epoch": 17.67,
453
+ "eval_loss": 0.7544116973876953,
454
+ "eval_runtime": 544.2672,
455
+ "eval_samples_per_second": 14.004,
456
+ "eval_steps_per_second": 1.751,
457
+ "eval_wer": 0.46403172166794576,
458
+ "step": 12000
459
+ },
460
+ {
461
+ "epoch": 18.26,
462
+ "learning_rate": 0.00012040764972320079,
463
+ "loss": 0.1252,
464
+ "step": 12400
465
+ },
466
+ {
467
+ "epoch": 18.26,
468
+ "eval_loss": 0.7654944658279419,
469
+ "eval_runtime": 548.3671,
470
+ "eval_samples_per_second": 13.899,
471
+ "eval_steps_per_second": 1.738,
472
+ "eval_wer": 0.45349194167306217,
473
+ "step": 12400
474
+ },
475
+ {
476
+ "epoch": 18.85,
477
+ "learning_rate": 0.00011436839456467034,
478
+ "loss": 0.1226,
479
+ "step": 12800
480
+ },
481
+ {
482
+ "epoch": 18.85,
483
+ "eval_loss": 0.7361114621162415,
484
+ "eval_runtime": 546.1341,
485
+ "eval_samples_per_second": 13.956,
486
+ "eval_steps_per_second": 1.745,
487
+ "eval_wer": 0.4549245331286774,
488
+ "step": 12800
489
+ },
490
+ {
491
+ "epoch": 19.44,
492
+ "learning_rate": 0.0001083291394061399,
493
+ "loss": 0.1165,
494
+ "step": 13200
495
+ },
496
+ {
497
+ "epoch": 19.44,
498
+ "eval_loss": 0.8643974661827087,
499
+ "eval_runtime": 543.4889,
500
+ "eval_samples_per_second": 14.024,
501
+ "eval_steps_per_second": 1.753,
502
+ "eval_wer": 0.45622921463289845,
503
+ "step": 13200
504
+ },
505
+ {
506
+ "epoch": 20.03,
507
+ "learning_rate": 0.00010230498238550579,
508
+ "loss": 0.1119,
509
+ "step": 13600
510
+ },
511
+ {
512
+ "epoch": 20.03,
513
+ "eval_loss": 0.8597950339317322,
514
+ "eval_runtime": 545.2818,
515
+ "eval_samples_per_second": 13.978,
516
+ "eval_steps_per_second": 1.748,
517
+ "eval_wer": 0.4632642619595805,
518
+ "step": 13600
519
+ },
520
+ {
521
+ "epoch": 20.62,
522
+ "learning_rate": 9.626572722697534e-05,
523
+ "loss": 0.1077,
524
+ "step": 14000
525
+ },
526
+ {
527
+ "epoch": 20.62,
528
+ "eval_loss": 0.8093447089195251,
529
+ "eval_runtime": 541.9895,
530
+ "eval_samples_per_second": 14.063,
531
+ "eval_steps_per_second": 1.758,
532
+ "eval_wer": 0.453031465848043,
533
+ "step": 14000
534
+ },
535
+ {
536
+ "epoch": 21.21,
537
+ "learning_rate": 9.02264720684449e-05,
538
+ "loss": 0.102,
539
+ "step": 14400
540
+ },
541
+ {
542
+ "epoch": 21.21,
543
+ "eval_loss": 0.8589721918106079,
544
+ "eval_runtime": 548.1684,
545
+ "eval_samples_per_second": 13.904,
546
+ "eval_steps_per_second": 1.739,
547
+ "eval_wer": 0.4514965464313124,
548
+ "step": 14400
549
+ },
550
+ {
551
+ "epoch": 21.8,
552
+ "learning_rate": 8.418721690991444e-05,
553
+ "loss": 0.0939,
554
+ "step": 14800
555
+ },
556
+ {
557
+ "epoch": 21.8,
558
+ "eval_loss": 0.8175553679466248,
559
+ "eval_runtime": 541.1705,
560
+ "eval_samples_per_second": 14.084,
561
+ "eval_steps_per_second": 1.761,
562
+ "eval_wer": 0.4527500639549757,
563
+ "step": 14800
564
+ },
565
+ {
566
+ "epoch": 22.39,
567
+ "learning_rate": 7.814796175138399e-05,
568
+ "loss": 0.0898,
569
+ "step": 15200
570
+ },
571
+ {
572
+ "epoch": 22.39,
573
+ "eval_loss": 0.8843649625778198,
574
+ "eval_runtime": 546.2264,
575
+ "eval_samples_per_second": 13.954,
576
+ "eval_steps_per_second": 1.745,
577
+ "eval_wer": 0.44740342798669736,
578
+ "step": 15200
579
+ },
580
+ {
581
+ "epoch": 22.97,
582
+ "learning_rate": 7.210870659285354e-05,
583
+ "loss": 0.0903,
584
+ "step": 15600
585
+ },
586
+ {
587
+ "epoch": 22.97,
588
+ "eval_loss": 0.8875829577445984,
589
+ "eval_runtime": 540.326,
590
+ "eval_samples_per_second": 14.106,
591
+ "eval_steps_per_second": 1.764,
592
+ "eval_wer": 0.45231517012023537,
593
+ "step": 15600
594
+ },
595
+ {
596
+ "epoch": 23.56,
597
+ "learning_rate": 6.606945143432309e-05,
598
+ "loss": 0.0848,
599
+ "step": 16000
600
+ },
601
+ {
602
+ "epoch": 23.56,
603
+ "eval_loss": 0.9255176782608032,
604
+ "eval_runtime": 544.8055,
605
+ "eval_samples_per_second": 13.99,
606
+ "eval_steps_per_second": 1.749,
607
+ "eval_wer": 0.44814530570478384,
608
+ "step": 16000
609
+ },
610
+ {
611
+ "epoch": 24.15,
612
+ "learning_rate": 6.003019627579265e-05,
613
+ "loss": 0.0822,
614
+ "step": 16400
615
+ },
616
+ {
617
+ "epoch": 24.15,
618
+ "eval_loss": 0.9284627437591553,
619
+ "eval_runtime": 544.7798,
620
+ "eval_samples_per_second": 13.991,
621
+ "eval_steps_per_second": 1.749,
622
+ "eval_wer": 0.4471220260936301,
623
+ "step": 16400
624
+ },
625
+ {
626
+ "epoch": 24.74,
627
+ "learning_rate": 5.39909411172622e-05,
628
+ "loss": 0.0767,
629
+ "step": 16800
630
+ },
631
+ {
632
+ "epoch": 24.74,
633
+ "eval_loss": 0.9410629868507385,
634
+ "eval_runtime": 544.0954,
635
+ "eval_samples_per_second": 14.009,
636
+ "eval_steps_per_second": 1.752,
637
+ "eval_wer": 0.4394218470196981,
638
+ "step": 16800
639
+ },
640
+ {
641
+ "epoch": 25.33,
642
+ "learning_rate": 4.795168595873175e-05,
643
+ "loss": 0.0735,
644
+ "step": 17200
645
+ },
646
+ {
647
+ "epoch": 25.33,
648
+ "eval_loss": 0.9868486523628235,
649
+ "eval_runtime": 546.6128,
650
+ "eval_samples_per_second": 13.944,
651
+ "eval_steps_per_second": 1.743,
652
+ "eval_wer": 0.44195446405730365,
653
+ "step": 17200
654
+ },
655
+ {
656
+ "epoch": 25.92,
657
+ "learning_rate": 4.1927528938097633e-05,
658
+ "loss": 0.0728,
659
+ "step": 17600
660
+ },
661
+ {
662
+ "epoch": 25.92,
663
+ "eval_loss": 0.938178539276123,
664
+ "eval_runtime": 542.4257,
665
+ "eval_samples_per_second": 14.052,
666
+ "eval_steps_per_second": 1.757,
667
+ "eval_wer": 0.4406753645433615,
668
+ "step": 17600
669
+ },
670
+ {
671
+ "epoch": 26.51,
672
+ "learning_rate": 3.590337191746351e-05,
673
+ "loss": 0.0673,
674
+ "step": 18000
675
+ },
676
+ {
677
+ "epoch": 26.51,
678
+ "eval_loss": 0.9842382669448853,
679
+ "eval_runtime": 544.7728,
680
+ "eval_samples_per_second": 13.991,
681
+ "eval_steps_per_second": 1.749,
682
+ "eval_wer": 0.4360706062931696,
683
+ "step": 18000
684
+ },
685
+ {
686
+ "epoch": 27.1,
687
+ "learning_rate": 2.9864116758933062e-05,
688
+ "loss": 0.0683,
689
+ "step": 18400
690
+ },
691
+ {
692
+ "epoch": 27.1,
693
+ "eval_loss": 0.9531042575836182,
694
+ "eval_runtime": 545.0114,
695
+ "eval_samples_per_second": 13.985,
696
+ "eval_steps_per_second": 1.749,
697
+ "eval_wer": 0.4396520849322077,
698
+ "step": 18400
699
+ },
700
+ {
701
+ "epoch": 27.69,
702
+ "learning_rate": 2.3824861600402614e-05,
703
+ "loss": 0.0631,
704
+ "step": 18800
705
+ },
706
+ {
707
+ "epoch": 27.69,
708
+ "eval_loss": 0.9613842964172363,
709
+ "eval_runtime": 542.3788,
710
+ "eval_samples_per_second": 14.053,
711
+ "eval_steps_per_second": 1.757,
712
+ "eval_wer": 0.4402148887183423,
713
+ "step": 18800
714
+ },
715
+ {
716
+ "epoch": 28.28,
717
+ "learning_rate": 1.7785606441872167e-05,
718
+ "loss": 0.0625,
719
+ "step": 19200
720
+ },
721
+ {
722
+ "epoch": 28.28,
723
+ "eval_loss": 0.9772672057151794,
724
+ "eval_runtime": 545.7524,
725
+ "eval_samples_per_second": 13.966,
726
+ "eval_steps_per_second": 1.746,
727
+ "eval_wer": 0.4349194167306216,
728
+ "step": 19200
729
+ },
730
+ {
731
+ "epoch": 28.87,
732
+ "learning_rate": 1.174635128334172e-05,
733
+ "loss": 0.0599,
734
+ "step": 19600
735
+ },
736
+ {
737
+ "epoch": 28.87,
738
+ "eval_loss": 0.9910905361175537,
739
+ "eval_runtime": 543.4098,
740
+ "eval_samples_per_second": 14.026,
741
+ "eval_steps_per_second": 1.754,
742
+ "eval_wer": 0.4378613456126887,
743
+ "step": 19600
744
+ },
745
+ {
746
+ "epoch": 29.45,
747
+ "learning_rate": 5.707096124811273e-06,
748
+ "loss": 0.0576,
749
+ "step": 20000
750
+ },
751
+ {
752
+ "epoch": 29.45,
753
+ "eval_loss": 0.9930649995803833,
754
+ "eval_runtime": 544.5195,
755
+ "eval_samples_per_second": 13.998,
756
+ "eval_steps_per_second": 1.75,
757
+ "eval_wer": 0.435354310565362,
758
+ "step": 20000
759
+ }
760
+ ],
761
+ "max_steps": 20370,
762
+ "num_train_epochs": 30,
763
+ "total_flos": 8.26011084747878e+19,
764
+ "trial_name": null,
765
+ "trial_params": null
766
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1659288d093e6b509908574dc3e90f1d68e84cbcdba04b02203ccfa27b28440c
3
+ size 2863
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_": 1, "e": 2, "g": 3, "t": 4, "«": 5, "»": 6, "ء": 7, "آ": 8, "أ": 9, "ؤ": 10, "إ": 11, "ئ": 12, "ا": 13, "ب": 14, "ة": 15, "ت": 16, "ث": 17, "ج": 18, "ح": 19, "خ": 20, "د": 21, "ذ": 22, "ر": 23, "ز": 24, "س": 25, "ش": 26, "ص": 27, "ض": 28, "ط": 29, "ظ": 30, "ع": 31, "غ": 32, "ـ": 33, "ف": 34, "ق": 35, "ك": 36, "ل": 37, "م": 38, "ن": 39, "ه": 40, "و": 41, "ى": 42, "ي": 43, "چ": 44, "ڨ": 45, "ک": 46, "ھ": 47, "ی": 48, "ۖ": 49, "ۚ": 50, "—": 51, "☭": 52, "ﺃ": 53, "ﻻ": 54, "|": 0, "[UNK]": 55, "[PAD]": 56}