Spaces:
Runtime error
Runtime error
from transformers import pipeline | |
import gradio as gr | |
from pytube import YouTube | |
from datasets import Dataset, Audio | |
import os | |
from moviepy.editor import AudioFileClip | |
pipe1 = pipeline(model="khalidey/ID2223_Lab2_Whisper_SV") # change to "your-username/the-name-you-picked" | |
pipe2 = pipeline('text-generation', model='birgermoell/swedish-gpt') | |
def transcribe(audio): | |
text = pipe1(audio)["text"] | |
generated_text = pipe2(text, max_length=50, num_return_sequences=2)[0]['generated_text'] | |
return text, generated_text | |
def youtube_link(url): | |
# Obtains the audio of the youtube video and returns the path of the mp4 file | |
streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4') | |
path = streams.first().download() | |
return path | |
def convert_to_wav(path): | |
sound = AudioFileClip(path) | |
segment = sound.subclip(0, -2) | |
segment.write_audiofile(f"segment.wav") | |
return f"segment.wav" | |
def youtube_transcribe(url): | |
path = youtube_link(url) | |
path_wav = convert_to_wav(path) | |
audio_dataset = Dataset.from_dict({"audio": [path]}).cast_column("audio", Audio(sampling_rate=16000)) | |
text = pipe1(audio_dataset["audio"]) | |
return text[0]["text"] | |
with gr.Blocks() as demo: | |
gr.Markdown("Whisper Small Swedish + Swedish GPT") | |
gr.Markdown("Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model & text generation with Swedish GPT.") | |
with gr.TabItem("Upload from disk"): | |
upload_file = gr.Audio(source="upload", type="filepath",label="Upload from disk") | |
upload_button = gr.Button("Submit for recognition") | |
upload_outputs = [ | |
gr.Textbox(label="Recognized speech from uploaded file"), | |
gr.Textbox(label="Swedish-gpt generated speech from uploaded file") | |
] | |
with gr.TabItem("Record from microphone"): | |
record_file = gr.Audio(source="microphone", type="filepath",label="Record from microphone") | |
record_button = gr.Button("Submit for recognition") | |
record_outputs = [ | |
gr.Textbox(label="Recognized speech from recordings"), | |
gr.Textbox(label="Swedish-gpt generated speech from recordings") | |
] | |
with gr.TabItem("Transcribe from Youtube URL"): | |
url = gr.Text(max_lines=1, label="Transcribe from YouTube URL") | |
youtube_button = gr.Button("Submit for recognition") | |
youtube_outputs = [ | |
gr.Textbox(label="Recognized speech from URL") | |
] | |
upload_button.click( | |
fn=transcribe, | |
inputs=upload_file, | |
outputs=upload_outputs, | |
) | |
record_button.click( | |
fn=transcribe, | |
inputs=record_file, | |
outputs=record_outputs, | |
) | |
youtube_button.click( | |
fn=youtube_transcribe, | |
inputs=url, | |
outputs=youtube_outputs, | |
) | |
demo.launch() |