Spaces:
Runtime error
Runtime error
File size: 3,007 Bytes
94cbd93 ffde417 94cbd93 cad9f2f fb79c03 c1c59f8 ffde417 cad9f2f ffde417 c1c59f8 143dc39 51423ee 143dc39 40da39c c1c59f8 98a9509 c1c59f8 40da39c 8f47d53 c1c59f8 40da39c 8f1f85c c1c59f8 98a9509 c1c59f8 40da39c cad9f2f c1c59f8 40da39c c1c59f8 143dc39 c1c59f8 8f47d53 143dc39 cad9f2f ffde417 cad9f2f c1c59f8 4bedefc 40da39c cad9f2f ffde417 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import os
import gradio as gr
from transformers import pipeline
from pytube import YouTube
from datasets import Dataset, Audio
from moviepy.editor import AudioFileClip
pipe = pipeline(model="Neprox/model")
def download_from_youtube(url):
streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
fpath = streams.first().download()
return fpath
def get_timestamp(seconds):
minutes = int(seconds / 60)
seconds = int(seconds % 60)
return f"{str(minutes).zfill(2)}:{str(seconds).zfill(2)}"
def divide_into_30s_segments(audio_fpath, seconds_max):
if not os.path.exists("segmented_audios"):
os.makedirs("segmented_audios")
sound = AudioFileClip(audio_fpath)
n_full_segments = int(sound.duration / 30)
len_last_segment = sound.duration % 30
max_segments = int(seconds_max / 30)
if n_full_segments > max_segments:
n_full_segments = max_segments
len_last_segment = 0
segment_paths = []
segment_start_times = []
segments_available = n_full_segments + 1
for i in range(min(segments_available, max_segments)):
start = i * 30
# Skip last segment if it is smaller than two seconds
is_last_segment = i == n_full_segments
if is_last_segment and not len_last_segment > 2:
continue
elif is_last_segment:
end = start + len_last_segment
else:
end = (i + 1) * 30
segment_path = os.path.join("segmented_audios", f"segment_{i}.wav")
segment = sound.subclip(start, end)
segment.write_audiofile(segment_path)
segment_paths.append(segment_path)
segment_start_times.append(start)
return segment_paths, segment_start_times
def transcribe(audio, url, seconds_max):
if url:
fpath = download_from_youtube(url)
segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)
audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio(sampling_rate=16000))
print(audio_dataset)
print(audio_dataset[0])
pred = pipe(audio_dataset["audio"])
text = ""
n_segments = len(segment_start_times)
for i, (seconds, output) in enumerate(zip(segment_start_times, pred)):
text += f"[Segment {i}/{n_segments}, start time {get_timestamp(seconds)}]\n{output['text']}\n"
return text
else:
text = pipe(audio)["text"]
return text
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath"),
gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed", label="YouTube URL"),
gr.Slider(minimum=30, maximum=300, value=30, step=30, label="Number of seconds to transcribe")
],
outputs="text",
title="Whisper Small Swedish",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
)
iface.launch()
|