metadata
datasets:
- mozilla-foundation/common_voice_16_1
base_model:
- openai/whisper-small
language:
- th
metrics:
- wer
Translation
from transformers import pipeline
import torch
MODEL_NAME = "Rookiezz/whisper-small-th"
lang = "th"
device = 0 if torch.cuda.is_available() else "CPU"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=448,
device=device,
)
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(
language=lang,
task="translate"
)
result = pipe("your_audio_file_path")
print(result["text"])