|
import gradio as gr
|
|
from transformers import pipeline
|
|
import torch
|
|
|
|
|
|
MODEL_NAME = "Rookiezz/whisper-small-th"
|
|
device = 0 if torch.cuda.is_available() else "cpu"
|
|
|
|
pipe = pipeline(
|
|
task="automatic-speech-recognition",
|
|
model=MODEL_NAME,
|
|
chunk_length_s=448,
|
|
device=device,
|
|
)
|
|
|
|
|
|
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(
|
|
language='th',
|
|
task="translate"
|
|
)
|
|
|
|
|
|
def transcribe(audio_file_path):
|
|
|
|
result = pipe(audio_file_path)
|
|
return result["text"]
|
|
|
|
|
|
interface = gr.Interface(
|
|
fn=transcribe,
|
|
inputs=gr.Audio(type="filepath"),
|
|
outputs="text",
|
|
title="Thai-to-English Audio Translation",
|
|
description="Upload an audio file in Thai, and this app will transcribe and translate it to English.",
|
|
allow_flagging="never",
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
interface.launch(share=True)
|
|
|