Rookiezz's picture
Upload app.py
f0eec04 verified
import gradio as gr
from transformers import pipeline
import torch
# Model setup
MODEL_NAME = "Rookiezz/whisper-small-th"
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=448,
device=device,
)
# Configure forced_decoder_ids for translation
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(
language='th',
task="translate"
)
# Function to process uploaded audio and return transcription
def transcribe(audio_file_path):
# Pass the uploaded audio file to the model pipeline
result = pipe(audio_file_path)
return result["text"]
# Gradio interface
interface = gr.Interface(
fn=transcribe, # Function to process the input
inputs=gr.Audio(type="filepath"), # File upload for audio input
outputs="text", # Display the transcription result as text
title="Thai-to-English Audio Translation",
description="Upload an audio file in Thai, and this app will transcribe and translate it to English.",
allow_flagging="never", # Disable flagging for simplicity
)
# Launch the Gradio app
if __name__ == "__main__":
interface.launch(share=True)