Neprox commited on
Commit
c1c59f8
1 Parent(s): cad9f2f

Add test version of youtube processing functionality

Browse files
Files changed (1) hide show
  1. app.py +45 -13
app.py CHANGED
@@ -2,23 +2,55 @@ from transformers import pipeline
2
  import gradio as gr
3
  from pytube import YouTube
4
  from transformers import Dataset, Audio
 
5
 
6
  pipe = pipeline(model="Neprox/model")
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def transcribe(audio, url):
9
  if url:
10
- # Download YouTube video
11
- streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
12
- audio_fpath = streams.first().download()
13
-
14
- # TODO:
15
- # Process up to 10 minutes by segmenting into 30 second blocks
16
- # Use pyMovie for selecting time ranges
17
- # query every block individually
18
- # Annotate text with timestamps
19
-
20
- audio_dataset = Dataset.from_dict({"audio": [audio_fpath]}).cast_column("audio", Audio())
21
- text = pipe(audio_dataset[0]["audio"])
22
  return text
23
 
24
  else:
@@ -28,7 +60,7 @@ def transcribe(audio, url):
28
  iface = gr.Interface(
29
  fn=transcribe,
30
  inputs=[
31
- gr.Audio(source="microphone", type="filepath")
32
  gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed")
33
  ],
34
  outputs="text",
 
2
  import gradio as gr
3
  from pytube import YouTube
4
  from transformers import Dataset, Audio
5
+ from moviepy.editor import AudioFileClip
6
 
7
  pipe = pipeline(model="Neprox/model")
8
 
9
+ def download_from_youtube(url):
10
+ streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
11
+ fpath = streams.first().download()
12
+ return fpath
13
+
14
+ def create_30s_segments(fpath):
15
+ if not os.path.exists("segmented_audios"):
16
+ os.makedirs("segmented_audios")
17
+
18
+ sound = AudioFileClip(fpath)
19
+ n_full_segments = int(sound.duration / 30)
20
+ len_last_segment = sound.duration % 30
21
+
22
+ segment_paths = []
23
+ segment_start_times = []
24
+
25
+ for i in range(n_full_segments + 1):
26
+
27
+ # Skip last segment if it is smaller than two seconds
28
+ is_last_segment = i == n_full_segments
29
+ if is_last_segment and not len_last_segment > 2:
30
+ continue
31
+ elif is_last_segment:
32
+ end = start + len_last_segment
33
+ else:
34
+ end = (i + 1) * 30
35
+
36
+ start = i * 30
37
+ segment_path = os.path.join("segmented_audios", f"segment_{i}.wav")
38
+ segment = sound.subclip(start, end)
39
+ segment.write_audiofile(segment_path)
40
+ segment_paths.append(segment_path)
41
+ segment_start_times.append(start)
42
+
43
+
44
  def transcribe(audio, url):
45
  if url:
46
+ fpath = download_from_youtube(url)
47
+ audio_segment_paths = create_30s_segments(fpath)
48
+
49
+ audio_dataset = Dataset.from_dict({"audio": audio_segment_paths}).cast_column("audio", Audio())
50
+ print(audio_dataset)
51
+ text = pipe(audio_dataset)
52
+ print(type(text))
53
+ print(text)
 
 
 
 
54
  return text
55
 
56
  else:
 
60
  iface = gr.Interface(
61
  fn=transcribe,
62
  inputs=[
63
+ gr.Audio(source="microphone", type="filepath"),
64
  gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed")
65
  ],
66
  outputs="text",