Spaces:
Running
Running
from datasets import load_dataset | |
from transformers import pipeline | |
import evaluate | |
import numpy as np | |
from tqdm import tqdm | |
ds = load_dataset("openslr/librispeech_asr", "clean", split="validation", streaming=True) | |
ds = ds.take(100) | |
model_name = { | |
"whisper-tiny": "openai/whisper-tiny.en", | |
"wav2vec2-large-960h": "facebook/wav2vec2-base-960h", | |
"distill-whisper-small": "distil-whisper/distil-small.en", | |
} | |
def evaluate_model(ds, pipe, wer_metric): | |
wer_scores = [] | |
wer_results = [] | |
for idx, sample in enumerate(tqdm(ds, desc="Evaluating", total=len(list(ds)))): | |
audio_sample = sample["audio"] | |
transcription = pipe(audio_sample["array"])['text'] | |
# Keep only letter and spaces for evaluation | |
transcription = transcription.replace(",", "").replace(".", "").replace("!", "").replace("?", "") | |
wer = wer_metric.compute(predictions=[transcription.upper()], references=[sample["text"].upper()]) | |
wer_scores.append(wer) | |
wer_results.append({ | |
"index": idx, | |
"transcription": transcription.upper(), | |
"reference": sample["text"].upper(), | |
"wer": wer | |
}) | |
return wer_scores, wer_results | |
# Load WER metric | |
wer_metric = evaluate.load("wer") | |
results = {} | |
model_wer_results = {} | |
# Evaluate model | |
for model in model_name: | |
pipe = pipeline("automatic-speech-recognition", model=model_name[model]) | |
wer_scores, wer_results = evaluate_model(ds, pipe, wer_metric) | |
results[model] = np.mean(wer_scores) | |
model_wer_results[model] = wer_results | |
for model in results: | |
print(f"Model: {model}, WER: {results[model]}") |