Spaces:

Neprox
/

STT-Swedish

Runtime error

App Files Files Community

Neprox commited on Dec 4, 2022

Commit

13b3459

•

1 Parent(s): a3c12f3

Add documentation

Browse files

Files changed (1) hide show

app.py +21 -0

app.py CHANGED Viewed

@@ -8,16 +8,27 @@ from moviepy.editor import AudioFileClip
 pipe = pipeline(model="Neprox/model")
 def download_from_youtube(url):
     streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
     fpath = streams.first().download()
     return fpath
 def get_timestamp(seconds):
     minutes = int(seconds / 60)
     seconds = int(seconds % 60)
     return f"{str(minutes).zfill(2)}:{str(seconds).zfill(2)}"
 def divide_into_30s_segments(audio_fpath, seconds_max):
     if not os.path.exists("segmented_audios"):
         os.makedirs("segmented_audios")
@@ -55,10 +66,20 @@ def divide_into_30s_segments(audio_fpath, seconds_max):
     return segment_paths, segment_start_times
 def get_translation(text):
     # TODO: Make API call to Google Translate to get English translation
     return "..."
 def transcribe(audio, url, seconds_max):
     if url:
         fpath = download_from_youtube(url)
         segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)

 pipe = pipeline(model="Neprox/model")
 def download_from_youtube(url):
+    """
+    Downloads the video from the given YouTube URL and returns the path to the audio file.
+    """
     streams = YouTube(url).streams.filter(only_audio=True, file_extension='mp4')
     fpath = streams.first().download()
     return fpath
 def get_timestamp(seconds):
+    """
+    Creates %M:%S timestamp from seconds.
+    """
     minutes = int(seconds / 60)
     seconds = int(seconds % 60)
     return f"{str(minutes).zfill(2)}:{str(seconds).zfill(2)}"
 def divide_into_30s_segments(audio_fpath, seconds_max):
+    """
+    Divides the audio file into 30s segments and returns the paths to the segments and the start times of the segments.
+    :param audio_fpath: Path to the audio file.
+    :param seconds_max: Maximum number of seconds to consider. If the audio file is longer than this, it will be truncated.
+    """
     if not os.path.exists("segmented_audios"):
         os.makedirs("segmented_audios")
     return segment_paths, segment_start_times
 def get_translation(text):
+    """
+    Translates the given Swedish text to English.
+    """
     # TODO: Make API call to Google Translate to get English translation
     return "..."
 def transcribe(audio, url, seconds_max):
+    """
+    Transcribes a YouTube video if a url is specified and returns the transcription.
+    If not url is specified, it transcribes the audio file as passed by Gradio.
+    :param audio: Audio file as passed by Gradio. Only used if no url is specified.
+    :param url: YouTube URL to transcribe.
+    :param seconds_max: Maximum number of seconds to consider. If the audio file is longer than this, it will be truncated.
+    """
     if url:
         fpath = download_from_youtube(url)
         segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)