Add more info
Browse files- README.md +78 -19
- predictions.csv +0 -0
- sample1608.flac +0 -0
- sample3860.flac +0 -0
README.md
CHANGED
@@ -9,10 +9,10 @@ tags:
|
|
9 |
- xlsr-fine-tuning-week
|
10 |
license: apache-2.0
|
11 |
widget:
|
12 |
-
- label: Malromur sample
|
13 |
-
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-icelandic/resolve/main/
|
14 |
-
- label: Malromur sample
|
15 |
-
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-icelandic/resolve/main/
|
16 |
model-index:
|
17 |
- name: XLSR Wav2Vec2 Icelandic by Mehrdad Farahani
|
18 |
results:
|
@@ -26,7 +26,7 @@ model-index:
|
|
26 |
metrics:
|
27 |
- name: Test WER
|
28 |
type: wer
|
29 |
-
value:
|
30 |
|
31 |
---
|
32 |
|
@@ -108,7 +108,7 @@ def predict(batch):
|
|
108 |
|
109 |
pred_ids = torch.argmax(logits, dim=-1)
|
110 |
|
111 |
-
batch["predicted"] = processor.batch_decode(pred_ids)
|
112 |
return batch
|
113 |
|
114 |
|
@@ -119,16 +119,16 @@ model = Wav2Vec2ForCTC.from_pretrained("m3hrdadfi/wav2vec2-large-xlsr-icelandic"
|
|
119 |
dataset = load_dataset("csv", data_files={"test": "./malromur_test.csv"})["test"]
|
120 |
dataset = dataset.map(
|
121 |
normalizer,
|
122 |
-
fn_kwargs={"
|
123 |
-
remove_columns=list(set(dataset.column_names) - set(['
|
124 |
)
|
125 |
|
126 |
dataset = dataset.map(speech_file_to_array_fn)
|
127 |
-
result = dataset.map(predict)
|
128 |
|
129 |
max_items = np.random.randint(0, len(result), 20).tolist()
|
130 |
for i in max_items:
|
131 |
-
reference, predicted = result["
|
132 |
print("reference:", reference)
|
133 |
print("predicted:", predicted)
|
134 |
print('---')
|
@@ -136,13 +136,72 @@ for i in max_items:
|
|
136 |
|
137 |
**Output:**
|
138 |
```text
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
```
|
141 |
|
142 |
|
143 |
## Evaluation
|
144 |
|
145 |
-
The model can be evaluated as follows on the test data of
|
146 |
|
147 |
```python
|
148 |
import librosa
|
@@ -180,7 +239,7 @@ def predict(batch):
|
|
180 |
|
181 |
pred_ids = torch.argmax(logits, dim=-1)
|
182 |
|
183 |
-
batch["predicted"] = processor.batch_decode(pred_ids)
|
184 |
return batch
|
185 |
|
186 |
|
@@ -191,21 +250,21 @@ model = Wav2Vec2ForCTC.from_pretrained("m3hrdadfi/wav2vec2-large-xlsr-icelandic"
|
|
191 |
dataset = load_dataset("csv", data_files={"test": "./malromur_test.csv"})["test"]
|
192 |
dataset = dataset.map(
|
193 |
normalizer,
|
194 |
-
fn_kwargs={"
|
195 |
-
remove_columns=list(set(dataset.column_names) - set(['
|
196 |
)
|
197 |
|
198 |
dataset = dataset.map(speech_file_to_array_fn)
|
199 |
-
result = dataset.map(predict)
|
200 |
|
201 |
wer = load_metric("wer")
|
202 |
|
203 |
-
print("WER: {:.2f}".format(100 * wer.compute(predictions=result["predicted"], references=result["
|
204 |
```
|
205 |
-
|
206 |
|
207 |
**Test Result**:
|
208 |
-
- WER:
|
209 |
|
210 |
|
211 |
## Training & Report
|
|
|
9 |
- xlsr-fine-tuning-week
|
10 |
license: apache-2.0
|
11 |
widget:
|
12 |
+
- label: Malromur sample 1608
|
13 |
+
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-icelandic/resolve/main/sample1608.flac
|
14 |
+
- label: Malromur sample 3860
|
15 |
+
src: https://huggingface.co/m3hrdadfi/wav2vec2-large-xlsr-icelandic/resolve/main/sample3860.flac
|
16 |
model-index:
|
17 |
- name: XLSR Wav2Vec2 Icelandic by Mehrdad Farahani
|
18 |
results:
|
|
|
26 |
metrics:
|
27 |
- name: Test WER
|
28 |
type: wer
|
29 |
+
value: 10.74
|
30 |
|
31 |
---
|
32 |
|
|
|
108 |
|
109 |
pred_ids = torch.argmax(logits, dim=-1)
|
110 |
|
111 |
+
batch["predicted"] = processor.batch_decode(pred_ids)
|
112 |
return batch
|
113 |
|
114 |
|
|
|
119 |
dataset = load_dataset("csv", data_files={"test": "./malromur_test.csv"})["test"]
|
120 |
dataset = dataset.map(
|
121 |
normalizer,
|
122 |
+
fn_kwargs={"do_lastspace_removing": True, "text_key_name": "cleaned_sentence"},
|
123 |
+
remove_columns=list(set(dataset.column_names) - set(['cleaned_sentence', 'path']))
|
124 |
)
|
125 |
|
126 |
dataset = dataset.map(speech_file_to_array_fn)
|
127 |
+
result = dataset.map(predict, batched=True, batch_size=8)
|
128 |
|
129 |
max_items = np.random.randint(0, len(result), 20).tolist()
|
130 |
for i in max_items:
|
131 |
+
reference, predicted = result["cleaned_sentence"][i], result["predicted"][i]
|
132 |
print("reference:", reference)
|
133 |
print("predicted:", predicted)
|
134 |
print('---')
|
|
|
136 |
|
137 |
**Output:**
|
138 |
```text
|
139 |
+
reference: lögregla rakti sporin í snjónum
|
140 |
+
predicted: lögregla rakti sporinn í snjónum
|
141 |
+
---
|
142 |
+
reference: vaðlatúni
|
143 |
+
predicted: vaðlatúni
|
144 |
+
---
|
145 |
+
reference: mykjunesi
|
146 |
+
predicted: mikjunesi
|
147 |
+
---
|
148 |
+
reference: miðey
|
149 |
+
predicted: miðey
|
150 |
+
---
|
151 |
+
reference: tveir mótmæla við stjórnarráðsbygginguna
|
152 |
+
predicted: tveir mótmæla við stjórnarráðsbegginguna
|
153 |
+
---
|
154 |
+
reference: furðustrandir mest selda bók ársins
|
155 |
+
predicted: furðustrandir mest seldabók ársins
|
156 |
+
---
|
157 |
+
reference: flekar brenndir í kvöld
|
158 |
+
predicted: flekar brenndir í kvöld
|
159 |
+
---
|
160 |
+
reference: ástæðan er sögð eldgosið í grímsvötnum
|
161 |
+
predicted: ástæðan er sögð eldgosið í grímsvötnum
|
162 |
+
---
|
163 |
+
reference: birtingur
|
164 |
+
predicted: birtingur
|
165 |
+
---
|
166 |
+
reference: tvöþúsund og átján
|
167 |
+
predicted: tvöþúsund og átján
|
168 |
+
---
|
169 |
+
reference: einfríður
|
170 |
+
predicted: einfríður
|
171 |
+
---
|
172 |
+
reference: dalhúsum
|
173 |
+
predicted: dalhúsum
|
174 |
+
---
|
175 |
+
reference: sex stútar á ferð
|
176 |
+
predicted: sex stútar á ferð
|
177 |
+
---
|
178 |
+
reference: eyjamenn áfram í toppbaráttu
|
179 |
+
predicted: eyjamenn áfram í toppbaráttu
|
180 |
+
---
|
181 |
+
reference: þetta októberkvöld sýndi sitt rétta andlit með hráslagakulda frá vatninu
|
182 |
+
predicted: þetta októberkvöld sýnsint réttla andlit með hráslagakulda frá vatninu
|
183 |
+
---
|
184 |
+
reference: jes
|
185 |
+
predicted: js
|
186 |
+
---
|
187 |
+
reference: hersveitirnar benda hvor á aðra
|
188 |
+
predicted: hersveitirnar benda hvor á aðra
|
189 |
+
---
|
190 |
+
reference: þetta er hráskinnsleikur stórvelda eins og hver maður vissi
|
191 |
+
predicted: þetta er hráskinnsleikur stórvelda eins og hver maður vissi
|
192 |
+
---
|
193 |
+
reference: umferð efstu deildar hófst
|
194 |
+
predicted: umferð efstu deildar hófst
|
195 |
+
---
|
196 |
+
reference: freisting is
|
197 |
+
predicted: freisting is
|
198 |
+
---
|
199 |
```
|
200 |
|
201 |
|
202 |
## Evaluation
|
203 |
|
204 |
+
The model can be evaluated as follows on the test data of Malromur.
|
205 |
|
206 |
```python
|
207 |
import librosa
|
|
|
239 |
|
240 |
pred_ids = torch.argmax(logits, dim=-1)
|
241 |
|
242 |
+
batch["predicted"] = processor.batch_decode(pred_ids)
|
243 |
return batch
|
244 |
|
245 |
|
|
|
250 |
dataset = load_dataset("csv", data_files={"test": "./malromur_test.csv"})["test"]
|
251 |
dataset = dataset.map(
|
252 |
normalizer,
|
253 |
+
fn_kwargs={"do_lastspace_removing": True, "text_key_name": "cleaned_sentence"},
|
254 |
+
remove_columns=list(set(dataset.column_names) - set(['cleaned_sentence', 'path']))
|
255 |
)
|
256 |
|
257 |
dataset = dataset.map(speech_file_to_array_fn)
|
258 |
+
result = dataset.map(predict, batched=True, batch_size=8)
|
259 |
|
260 |
wer = load_metric("wer")
|
261 |
|
262 |
+
print("WER: {:.2f}".format(100 * wer.compute(predictions=result["predicted"], references=result["cleaned_sentence"])))
|
263 |
```
|
264 |
+
|
265 |
|
266 |
**Test Result**:
|
267 |
+
- WER: 10.74%
|
268 |
|
269 |
|
270 |
## Training & Report
|
predictions.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
sample1608.flac
ADDED
Binary file (109 kB). View file
|
|
sample3860.flac
ADDED
Binary file (75.7 kB). View file
|
|