Spaces:

khalidey
/

ID2223-Lab2-Whisper

Runtime error

File size: 2,598 Bytes

abaf86c
 
20baefb
9d22070
abaf86c
7069d5c
9417b92
abaf86c
 
7069d5c
f49454a
b23d0f5
7069d5c
20baefb
 
 
 
 
 
 
 
 
 
 
 
cdb9ddf
20baefb
5eb56a0
 
20baefb
b152137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20baefb
 
 
 
9074304
20baefb
b152137
 
 
 
 
 
 
 
 
 
20baefb
 
 
 
 
b152137

from transformers import pipeline
import gradio as gr
from pytube import YouTube
from datasets import Dataset, Audio

pipe1 = pipeline(model="khalidey/ID2223_Lab2_Whisper_SV")  # change to "your-username/the-name-you-picked"
pipe2 = pipeline('text-generation', model='birgermoell/swedish-gpt')

def transcribe(audio):
    text = pipe1(audio)["text"]
    generated_text = pipe2(text, max_length=50, num_return_sequences=2)[0]['generated_text']
    return text, generated_text

def youtube_link(url):
    
    # Obtains the audio of the youtube video and returns the path of the mp4 file

    streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
    path = streams.first().download()
    return path

def youtube_transcribe(url):
    
    path = youtube_link(url)
    
    audio_dataset = Dataset.from_dict({"audio": list(path)}).cast_column("audio", Audio(sampling_rate=16000))
    text = pipe1(audio_dataset["audio"])["text"]

    return text
    
with gr.Blocks() as demo:
    gr.Markdown("Whisper Small Swedish + Swedish GPT")
    gr.Markdown("Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model & text generation with Swedish GPT.")
    with gr.TabItem("Upload from disk"):
        upload_file = gr.Audio(source="upload", type="filepath",label="Upload from disk")
        upload_button = gr.Button("Submit for recognition")
        upload_outputs = [
            gr.Textbox(label="Recognized speech from uploaded file"),
            gr.Textbox(label="Swedish-gpt generated speech from uploaded file")
        ]
    with gr.TabItem("Record from microphone"):
        record_file = gr.Audio(source="microphone", type="filepath",label="Record from microphone")
        record_button = gr.Button("Submit for recognition")
        record_outputs = [
            gr.Textbox(label="Recognized speech from recordings"),
            gr.Textbox(label="Swedish-gpt generated speech from recordings")
        ]
    with gr.TabItem("Transcribe from Youtube URL"):
        url = gr.Text(max_lines=1, label="Transcribe from YouTube URL")
        youtube_button = gr.Button("Submit for recognition")
        youtube_outputs = [
            gr.Textbox(label="Recognized speech from URL")
        ]    
    upload_button.click(
        fn=transcribe,
        inputs=upload_file,
        outputs=upload_outputs,
    )
    record_button.click(
        fn=transcribe,
        inputs=record_file,
        outputs=record_outputs,
    )
    youtube_button.click(
    fn=youtube_transcribe,
    inputs=url,
    outputs=youtube_outputs,
    )
        
demo.launch()