ASR_ID2223 / app.py
irena's picture
Update app.py
b28a0c9
import gradio as gr
from transformers import pipeline
from pytube import YouTube
pipe = pipeline(model="irena/whisper-small-sv-SE")
def transcribe_video(url):
yt=YouTube(url).streams.filter(only_audio=True).all()
audio=yt[0].download()
text = pipe(audio)["text"]
return text
def transcribe_audio(audio):
text = pipe(audio)["text"]
return text
def transcribe_file(audio):
text = pipe(audio)["text"]
return text
audio = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs="text",
title="Whisper Small Swedish",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
)
file = gr.Interface(
fn=transcribe_file,
inputs=[
gr.inputs.Audio(source="upload", type="filepath", optional=True),
],
outputs="text",
title="Whisper Small Swedish",
description=(
"Transcribe swedish audios"
)
)
video = gr.Interface(
fn=transcribe_video,
inputs=gr.Textbox(label="Enter a YouTube URL:"),
outputs="text",
title="Whisper Small Swedish",
description="Transcribe swedish videos from YouTube",
)
demo = gr.TabbedInterface([audio, file, video], ["transcribe from microphone", "transcribe from local audios", "transcribe from youtube url"])
if __name__ == "__main__":
demo.launch()