Neprox commited on
Commit
13b3459
1 Parent(s): a3c12f3

Add documentation

Browse files
Files changed (1) hide show
  1. app.py +21 -0
app.py CHANGED
@@ -8,16 +8,27 @@ from moviepy.editor import AudioFileClip
8
  pipe = pipeline(model="Neprox/model")
9
 
10
  def download_from_youtube(url):
 
 
 
11
  streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
12
  fpath = streams.first().download()
13
  return fpath
14
 
15
  def get_timestamp(seconds):
 
 
 
16
  minutes = int(seconds / 60)
17
  seconds = int(seconds % 60)
18
  return f"{str(minutes).zfill(2)}:{str(seconds).zfill(2)}"
19
 
20
  def divide_into_30s_segments(audio_fpath, seconds_max):
 
 
 
 
 
21
  if not os.path.exists("segmented_audios"):
22
  os.makedirs("segmented_audios")
23
 
@@ -55,10 +66,20 @@ def divide_into_30s_segments(audio_fpath, seconds_max):
55
  return segment_paths, segment_start_times
56
 
57
  def get_translation(text):
 
 
 
58
  # TODO: Make API call to Google Translate to get English translation
59
  return "..."
60
 
61
  def transcribe(audio, url, seconds_max):
 
 
 
 
 
 
 
62
  if url:
63
  fpath = download_from_youtube(url)
64
  segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)
 
8
  pipe = pipeline(model="Neprox/model")
9
 
10
  def download_from_youtube(url):
11
+ """
12
+ Downloads the video from the given YouTube URL and returns the path to the audio file.
13
+ """
14
  streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
15
  fpath = streams.first().download()
16
  return fpath
17
 
18
  def get_timestamp(seconds):
19
+ """
20
+ Creates %M:%S timestamp from seconds.
21
+ """
22
  minutes = int(seconds / 60)
23
  seconds = int(seconds % 60)
24
  return f"{str(minutes).zfill(2)}:{str(seconds).zfill(2)}"
25
 
26
  def divide_into_30s_segments(audio_fpath, seconds_max):
27
+ """
28
+ Divides the audio file into 30s segments and returns the paths to the segments and the start times of the segments.
29
+ :param audio_fpath: Path to the audio file.
30
+ :param seconds_max: Maximum number of seconds to consider. If the audio file is longer than this, it will be truncated.
31
+ """
32
  if not os.path.exists("segmented_audios"):
33
  os.makedirs("segmented_audios")
34
 
 
66
  return segment_paths, segment_start_times
67
 
68
  def get_translation(text):
69
+ """
70
+ Translates the given Swedish text to English.
71
+ """
72
  # TODO: Make API call to Google Translate to get English translation
73
  return "..."
74
 
75
  def transcribe(audio, url, seconds_max):
76
+ """
77
+ Transcribes a YouTube video if a url is specified and returns the transcription.
78
+ If not url is specified, it transcribes the audio file as passed by Gradio.
79
+ :param audio: Audio file as passed by Gradio. Only used if no url is specified.
80
+ :param url: YouTube URL to transcribe.
81
+ :param seconds_max: Maximum number of seconds to consider. If the audio file is longer than this, it will be truncated.
82
+ """
83
  if url:
84
  fpath = download_from_youtube(url)
85
  segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)