Commit
·
a57b9fd
1
Parent(s):
246a60f
Update README.md
Browse files
README.md
CHANGED
@@ -81,10 +81,6 @@ print("Reference:", test_dataset["sentence"][:2])
|
|
81 |
The model can be evaluated as follows on the Japanese test data of Common Voice.
|
82 |
|
83 |
```python
|
84 |
-
!pip install mecab-python3
|
85 |
-
!pip install unidic-lite
|
86 |
-
!python -m unidic download
|
87 |
-
|
88 |
import torch
|
89 |
import torchaudio
|
90 |
from datasets import load_dataset, load_metric
|
@@ -98,7 +94,7 @@ processor = Wav2Vec2Processor.from_pretrained("qqhann/w2v_hf_jsut_xlsr53")
|
|
98 |
model = Wav2Vec2ForCTC.from_pretrained("qqhann/w2v_hf_jsut_xlsr53")
|
99 |
model.to("cuda")
|
100 |
|
101 |
-
chars_to_ignore_regex = '[
|
102 |
# resampler = torchaudio.transforms.Resample(48_000, 16_000) # JSUT is already 16kHz
|
103 |
resampler = torchaudio.transforms.Resample(16_000, 16_000) # JSUT is already 16kHz
|
104 |
|
|
|
81 |
The model can be evaluated as follows on the Japanese test data of Common Voice.
|
82 |
|
83 |
```python
|
|
|
|
|
|
|
|
|
84 |
import torch
|
85 |
import torchaudio
|
86 |
from datasets import load_dataset, load_metric
|
|
|
94 |
model = Wav2Vec2ForCTC.from_pretrained("qqhann/w2v_hf_jsut_xlsr53")
|
95 |
model.to("cuda")
|
96 |
|
97 |
+
chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“]' # TODO: adapt this list to include all special characters you removed from the data
|
98 |
# resampler = torchaudio.transforms.Resample(48_000, 16_000) # JSUT is already 16kHz
|
99 |
resampler = torchaudio.transforms.Resample(16_000, 16_000) # JSUT is already 16kHz
|
100 |
|