Spaces:
Runtime error
Runtime error
File size: 2,598 Bytes
abaf86c 20baefb 9d22070 abaf86c 7069d5c 9417b92 abaf86c 7069d5c f49454a b23d0f5 7069d5c 20baefb cdb9ddf 20baefb 5eb56a0 20baefb b152137 20baefb 9074304 20baefb b152137 20baefb b152137 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
from transformers import pipeline
import gradio as gr
from pytube import YouTube
from datasets import Dataset, Audio
pipe1 = pipeline(model="khalidey/ID2223_Lab2_Whisper_SV") # change to "your-username/the-name-you-picked"
pipe2 = pipeline('text-generation', model='birgermoell/swedish-gpt')
def transcribe(audio):
text = pipe1(audio)["text"]
generated_text = pipe2(text, max_length=50, num_return_sequences=2)[0]['generated_text']
return text, generated_text
def youtube_link(url):
# Obtains the audio of the youtube video and returns the path of the mp4 file
streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
path = streams.first().download()
return path
def youtube_transcribe(url):
path = youtube_link(url)
audio_dataset = Dataset.from_dict({"audio": list(path)}).cast_column("audio", Audio(sampling_rate=16000))
text = pipe1(audio_dataset["audio"])["text"]
return text
with gr.Blocks() as demo:
gr.Markdown("Whisper Small Swedish + Swedish GPT")
gr.Markdown("Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model & text generation with Swedish GPT.")
with gr.TabItem("Upload from disk"):
upload_file = gr.Audio(source="upload", type="filepath",label="Upload from disk")
upload_button = gr.Button("Submit for recognition")
upload_outputs = [
gr.Textbox(label="Recognized speech from uploaded file"),
gr.Textbox(label="Swedish-gpt generated speech from uploaded file")
]
with gr.TabItem("Record from microphone"):
record_file = gr.Audio(source="microphone", type="filepath",label="Record from microphone")
record_button = gr.Button("Submit for recognition")
record_outputs = [
gr.Textbox(label="Recognized speech from recordings"),
gr.Textbox(label="Swedish-gpt generated speech from recordings")
]
with gr.TabItem("Transcribe from Youtube URL"):
url = gr.Text(max_lines=1, label="Transcribe from YouTube URL")
youtube_button = gr.Button("Submit for recognition")
youtube_outputs = [
gr.Textbox(label="Recognized speech from URL")
]
upload_button.click(
fn=transcribe,
inputs=upload_file,
outputs=upload_outputs,
)
record_button.click(
fn=transcribe,
inputs=record_file,
outputs=record_outputs,
)
youtube_button.click(
fn=youtube_transcribe,
inputs=url,
outputs=youtube_outputs,
)
demo.launch() |