import gradio as gr
from transformers import pipeline
import torch

# Model setup
MODEL_NAME = "Rookiezz/whisper-small-th"
device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=448,
    device=device,
)

# Configure forced_decoder_ids for translation
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(
    language='th',
    task="translate"
)

# Function to process uploaded audio and return transcription
def transcribe(audio_file_path):
    # Pass the uploaded audio file to the model pipeline
    result = pipe(audio_file_path)
    return result["text"]

# Gradio interface
interface = gr.Interface(
    fn=transcribe,  # Function to process the input
    inputs=gr.Audio(type="filepath"),  # File upload for audio input
    outputs="text",  # Display the transcription result as text
    title="Thai-to-English Audio Translation",
    description="Upload an audio file in Thai, and this app will transcribe and translate it to English.",
    allow_flagging="never",  # Disable flagging for simplicity
)

# Launch the Gradio app
if __name__ == "__main__":
    interface.launch(share=True)