Spaces:
Runtime error
Runtime error
Add test version of youtube processing functionality
Browse files
app.py
CHANGED
@@ -2,23 +2,55 @@ from transformers import pipeline
|
|
2 |
import gradio as gr
|
3 |
from pytube import YouTube
|
4 |
from transformers import Dataset, Audio
|
|
|
5 |
|
6 |
pipe = pipeline(model="Neprox/model")
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
def transcribe(audio, url):
|
9 |
if url:
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
# Annotate text with timestamps
|
19 |
-
|
20 |
-
audio_dataset = Dataset.from_dict({"audio": [audio_fpath]}).cast_column("audio", Audio())
|
21 |
-
text = pipe(audio_dataset[0]["audio"])
|
22 |
return text
|
23 |
|
24 |
else:
|
@@ -28,7 +60,7 @@ def transcribe(audio, url):
|
|
28 |
iface = gr.Interface(
|
29 |
fn=transcribe,
|
30 |
inputs=[
|
31 |
-
gr.Audio(source="microphone", type="filepath")
|
32 |
gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed")
|
33 |
],
|
34 |
outputs="text",
|
|
|
2 |
import gradio as gr
|
3 |
from pytube import YouTube
|
4 |
from transformers import Dataset, Audio
|
5 |
+
from moviepy.editor import AudioFileClip
|
6 |
|
7 |
pipe = pipeline(model="Neprox/model")
|
8 |
|
9 |
+
def download_from_youtube(url):
|
10 |
+
streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
|
11 |
+
fpath = streams.first().download()
|
12 |
+
return fpath
|
13 |
+
|
14 |
+
def create_30s_segments(fpath):
|
15 |
+
if not os.path.exists("segmented_audios"):
|
16 |
+
os.makedirs("segmented_audios")
|
17 |
+
|
18 |
+
sound = AudioFileClip(fpath)
|
19 |
+
n_full_segments = int(sound.duration / 30)
|
20 |
+
len_last_segment = sound.duration % 30
|
21 |
+
|
22 |
+
segment_paths = []
|
23 |
+
segment_start_times = []
|
24 |
+
|
25 |
+
for i in range(n_full_segments + 1):
|
26 |
+
|
27 |
+
# Skip last segment if it is smaller than two seconds
|
28 |
+
is_last_segment = i == n_full_segments
|
29 |
+
if is_last_segment and not len_last_segment > 2:
|
30 |
+
continue
|
31 |
+
elif is_last_segment:
|
32 |
+
end = start + len_last_segment
|
33 |
+
else:
|
34 |
+
end = (i + 1) * 30
|
35 |
+
|
36 |
+
start = i * 30
|
37 |
+
segment_path = os.path.join("segmented_audios", f"segment_{i}.wav")
|
38 |
+
segment = sound.subclip(start, end)
|
39 |
+
segment.write_audiofile(segment_path)
|
40 |
+
segment_paths.append(segment_path)
|
41 |
+
segment_start_times.append(start)
|
42 |
+
|
43 |
+
|
44 |
def transcribe(audio, url):
|
45 |
if url:
|
46 |
+
fpath = download_from_youtube(url)
|
47 |
+
audio_segment_paths = create_30s_segments(fpath)
|
48 |
+
|
49 |
+
audio_dataset = Dataset.from_dict({"audio": audio_segment_paths}).cast_column("audio", Audio())
|
50 |
+
print(audio_dataset)
|
51 |
+
text = pipe(audio_dataset)
|
52 |
+
print(type(text))
|
53 |
+
print(text)
|
|
|
|
|
|
|
|
|
54 |
return text
|
55 |
|
56 |
else:
|
|
|
60 |
iface = gr.Interface(
|
61 |
fn=transcribe,
|
62 |
inputs=[
|
63 |
+
gr.Audio(source="microphone", type="filepath"),
|
64 |
gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed")
|
65 |
],
|
66 |
outputs="text",
|