import gradio as gr import torch from transformers import WhisperForConditionalGeneration, WhisperProcessor, WhisperTokenizer import librosa model_name = "shReYas0363/whisper-tiny-fine-tuned" processor = WhisperProcessor.from_pretrained("openai/whisper-tiny") tokenizer=WhisperTokenizer.from_pretrained("openai/whisper-tiny") model = WhisperForConditionalGeneration.from_pretrained(model_name) def transcribe(audio_file_path): audio, sr = librosa.load(audio_file_path, sr=16000) input_features = processor(audio, return_tensors="pt", sampling_rate=16000).input_features with torch.no_grad(): predicted_ids = model.generate(input_features) transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] return transcription interface = gr.Interface( fn=transcribe, inputs=gr.Audio(sources=["upload"], type="filepath"), outputs=gr.Textbox(label="Transcription"), title="Whisper Tiny ASR", description="Fine-tuned on AI4Bharat's svarah dataset" ) interface.launch()