Spaces:
Runtime error
Runtime error
File size: 2,863 Bytes
abaf86c 20baefb 9d22070 d3e2fa4 abaf86c 7069d5c 9417b92 abaf86c 7069d5c f49454a b23d0f5 7069d5c 20baefb d3e2fa4 9aa5fc9 d3e2fa4 9aa5fc9 d3e2fa4 9aa5fc9 d3e2fa4 20baefb c521533 20baefb 9aa5fc9 5ffa47f 5eb56a0 5ffa47f 20baefb b152137 20baefb 9074304 20baefb b152137 20baefb b152137 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
from transformers import pipeline
import gradio as gr
from pytube import YouTube
from datasets import Dataset, Audio
import os
from moviepy.editor import AudioFileClip
pipe1 = pipeline(model="khalidey/ID2223_Lab2_Whisper_SV") # change to "your-username/the-name-you-picked"
pipe2 = pipeline('text-generation', model='birgermoell/swedish-gpt')
def transcribe(audio):
text = pipe1(audio)["text"]
generated_text = pipe2(text, max_length=50, num_return_sequences=2)[0]['generated_text']
return text, generated_text
def youtube_link(url):
# Obtains the audio of the youtube video and returns the path of the mp4 file
streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
path = streams.first().download()
return path
def convert_to_wav(path):
audio = AudioFileClip(path)
audio_frame = audio.subclip(0, -2)
audio_frame.write_audiofile(f"audio.wav")
return f"audio.wav"
def youtube_transcribe(url):
path = youtube_link(url)
path_wav = convert_to_wav(path)
audio_dataset = Dataset.from_dict({"audio": [path_wav]}).cast_column("audio", Audio(sampling_rate=16000))
text = pipe1(audio_dataset["audio"])
return text[0]["text"]
with gr.Blocks() as demo:
gr.Markdown("Whisper Small Swedish + Swedish GPT")
gr.Markdown("Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model & text generation with Swedish GPT.")
with gr.TabItem("Upload from disk"):
upload_file = gr.Audio(source="upload", type="filepath",label="Upload from disk")
upload_button = gr.Button("Submit for recognition")
upload_outputs = [
gr.Textbox(label="Recognized speech from uploaded file"),
gr.Textbox(label="Swedish-gpt generated speech from uploaded file")
]
with gr.TabItem("Record from microphone"):
record_file = gr.Audio(source="microphone", type="filepath",label="Record from microphone")
record_button = gr.Button("Submit for recognition")
record_outputs = [
gr.Textbox(label="Recognized speech from recordings"),
gr.Textbox(label="Swedish-gpt generated speech from recordings")
]
with gr.TabItem("Transcribe from Youtube URL"):
url = gr.Text(max_lines=1, label="Transcribe from YouTube URL")
youtube_button = gr.Button("Submit for recognition")
youtube_outputs = [
gr.Textbox(label="Recognized speech from URL")
]
upload_button.click(
fn=transcribe,
inputs=upload_file,
outputs=upload_outputs,
)
record_button.click(
fn=transcribe,
inputs=record_file,
outputs=record_outputs,
)
youtube_button.click(
fn=youtube_transcribe,
inputs=url,
outputs=youtube_outputs,
)
demo.launch() |