patrickvonplaten
commited on
Commit
•
29d8ab9
1
Parent(s):
11d2712
Update README.md
Browse files
README.md
CHANGED
@@ -69,25 +69,18 @@ with `pip install torchaudio sentencepiece`.
|
|
69 |
import torch
|
70 |
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
|
71 |
from datasets import load_dataset
|
72 |
-
import soundfile as sf
|
73 |
|
74 |
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
|
75 |
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
|
76 |
|
77 |
-
def map_to_array(batch):
|
78 |
-
speech, _ = sf.read(batch["file"])
|
79 |
-
batch["speech"] = speech
|
80 |
-
return batch
|
81 |
-
|
82 |
ds = load_dataset(
|
83 |
"patrickvonplaten/librispeech_asr_dummy",
|
84 |
"clean",
|
85 |
split="validation"
|
86 |
)
|
87 |
-
ds = ds.map(map_to_array)
|
88 |
|
89 |
input_features = processor(
|
90 |
-
ds["
|
91 |
sampling_rate=16_000,
|
92 |
return_tensors="pt"
|
93 |
).input_features # Batch size 1
|
@@ -104,7 +97,6 @@ The following script shows how to evaluate this model on the [LibriSpeech](https
|
|
104 |
```python
|
105 |
from datasets import load_dataset, load_metric
|
106 |
from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
|
107 |
-
import soundfile as sf
|
108 |
|
109 |
librispeech_eval = load_dataset("librispeech_asr", "clean", split="test") # change to "other" for other test dataset
|
110 |
wer = load_metric("wer")
|
@@ -112,15 +104,10 @@ wer = load_metric("wer")
|
|
112 |
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr").to("cuda")
|
113 |
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr", do_upper_case=True)
|
114 |
|
115 |
-
def map_to_array(batch):
|
116 |
-
speech, _ = sf.read(batch["file"])
|
117 |
-
batch["speech"] = speech
|
118 |
-
return batch
|
119 |
-
|
120 |
librispeech_eval = librispeech_eval.map(map_to_array)
|
121 |
|
122 |
def map_to_pred(batch):
|
123 |
-
features = processor(batch["
|
124 |
input_features = features.input_features.to("cuda")
|
125 |
attention_mask = features.attention_mask.to("cuda")
|
126 |
|
|
|
69 |
import torch
|
70 |
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
|
71 |
from datasets import load_dataset
|
|
|
72 |
|
73 |
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
|
74 |
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
|
75 |
|
|
|
|
|
|
|
|
|
|
|
76 |
ds = load_dataset(
|
77 |
"patrickvonplaten/librispeech_asr_dummy",
|
78 |
"clean",
|
79 |
split="validation"
|
80 |
)
|
|
|
81 |
|
82 |
input_features = processor(
|
83 |
+
ds["audio"]["array"][0],
|
84 |
sampling_rate=16_000,
|
85 |
return_tensors="pt"
|
86 |
).input_features # Batch size 1
|
|
|
97 |
```python
|
98 |
from datasets import load_dataset, load_metric
|
99 |
from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
|
|
|
100 |
|
101 |
librispeech_eval = load_dataset("librispeech_asr", "clean", split="test") # change to "other" for other test dataset
|
102 |
wer = load_metric("wer")
|
|
|
104 |
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr").to("cuda")
|
105 |
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr", do_upper_case=True)
|
106 |
|
|
|
|
|
|
|
|
|
|
|
107 |
librispeech_eval = librispeech_eval.map(map_to_array)
|
108 |
|
109 |
def map_to_pred(batch):
|
110 |
+
features = processor(batch["audio"]["array"], sampling_rate=16000, padding=True, return_tensors="pt")
|
111 |
input_features = features.input_features.to("cuda")
|
112 |
attention_mask = features.attention_mask.to("cuda")
|
113 |
|