Neprox commited on
Commit
8f47d53
1 Parent(s): 3b7997e
Files changed (1) hide show
  1. app.py +6 -0
app.py CHANGED
@@ -6,6 +6,7 @@ from datasets import Dataset, Audio
6
  from moviepy.editor import AudioFileClip
7
 
8
  pipe = pipeline(model="Neprox/model")
 
9
 
10
  def download_from_youtube(url):
11
  streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
@@ -20,6 +21,10 @@ def divide_into_30s_segments(audio_fpath):
20
  n_full_segments = int(sound.duration / 30)
21
  len_last_segment = sound.duration % 30
22
 
 
 
 
 
23
  segment_paths = []
24
  segment_start_times = []
25
 
@@ -51,6 +56,7 @@ def transcribe(audio, url):
51
 
52
  audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio())
53
  print(audio_dataset)
 
54
  text = pipe(audio_dataset)
55
  print(type(text))
56
  print(text)
 
6
  from moviepy.editor import AudioFileClip
7
 
8
  pipe = pipeline(model="Neprox/model")
9
+ MAX_SEGMENTS = 10 # 5 minutes
10
 
11
  def download_from_youtube(url):
12
  streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
 
21
  n_full_segments = int(sound.duration / 30)
22
  len_last_segment = sound.duration % 30
23
 
24
+ if n_full_segments > MAX_SEGMENTS:
25
+ n_full_segments = MAX_SEGMENTS
26
+ len_last_segment = 0
27
+
28
  segment_paths = []
29
  segment_start_times = []
30
 
 
56
 
57
  audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio())
58
  print(audio_dataset)
59
+ print(audio_dataset[0])
60
  text = pipe(audio_dataset)
61
  print(type(text))
62
  print(text)