Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
from transformers import pipeline | |
from pytube import YouTube | |
pipe = pipeline(model="irena/whisper-small-sv-SE") | |
def main_generator(youtube_id:str): | |
YouTubeID = youtube_id.split("https://www.youtube.com/watch?v=") # | |
if len(YouTubeID)>1: | |
YouTubeID = YouTubeID[1] | |
else: | |
YouTubeID ='xOZM-1p-jAk' | |
OutputFile = f'test_audio_youtube_{YouTubeID}.m4a' | |
os.system(f"youtube-dl -o {OutputFile} {YouTubeID} --extract-audio --restrict-filenames -f 'bestaudio[ext=m4a]'") | |
result = model_whisper.transcribe(OutputFile) | |
text = result['text'] | |
output_list = [] | |
output_list.append(text) | |
return text | |
def transcribe(audio): | |
text = pipe(audio)["text"] | |
return text | |
demo = gr.Blocks() | |
iface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(source="microphone", type="filepath"), | |
outputs="text", | |
title="Whisper Small Swedish-Microphone", | |
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model. An audio for recognize.", | |
) | |
inputs = [gr.Textbox(lines=1, placeholder="Link of youtube video here...", label="Input")] | |
outputs = gr.HighlightedText() | |
title="Transcription of Swedish videos" | |
description = "This demo uses small Whisper to transcribe what is spoken in a swedish video" | |
examples = ['https://www.youtube.com/watch?v=6eWhV7xYH-Q'] | |
io = gr.Interface(fn=main_generator, inputs=inputs, outputs=outputs, title=title, description = description, examples = examples, | |
css= """.gr-button-primary { background: -webkit-linear-gradient( | |
90deg, #355764 0%, #55a8a1 100% ) !important; background: #355764; | |
background: linear-gradient( | |
90deg, #355764 0%, #55a8a1 100% ) !important; | |
background: -moz-linear-gradient( 90deg, #355764 0%, #55a8a1 100% ) !important; | |
background: -webkit-linear-gradient( | |
90deg, #355764 0%, #55a8a1 100% ) !important; | |
color:white !important}""" | |
) | |
with demo: | |
gr.TabbedInterface([iface, yt], ["Transcribe Audio", "Transcribe YouTube"]) | |
demo.launch(enable_queue=True) | |