Spaces:

Neprox
/

STT-Swedish

Runtime error

App Files Files Community

Neprox commited on Dec 8, 2022

Commit

142a301

•

1 Parent(s): 13b3459

Add translation part

Browse files

Files changed (2) hide show

app.py +26 -13
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -5,7 +5,16 @@ from pytube import YouTube
 from datasets import Dataset, Audio
 from moviepy.editor import AudioFileClip
 pipe = pipeline(model="Neprox/model")
 def download_from_youtube(url):
     """
@@ -65,19 +74,21 @@ def divide_into_30s_segments(audio_fpath, seconds_max):
     return segment_paths, segment_start_times
-def get_translation(text):
     """
-    Translates the given Swedish text to English.
     """
-    # TODO: Make API call to Google Translate to get English translation
-    return "..."
-def transcribe(audio, url, seconds_max):
     """
-    Transcribes a YouTube video if a url is specified and returns the transcription.
-    If not url is specified, it transcribes the audio file as passed by Gradio.
     :param audio: Audio file as passed by Gradio. Only used if no url is specified.
-    :param url: YouTube URL to transcribe.
     :param seconds_max: Maximum number of seconds to consider. If the audio file is longer than this, it will be truncated.
     """
     if url:
@@ -91,7 +102,8 @@ def transcribe(audio, url, seconds_max):
         for i, (seconds, output) in enumerate(zip(segment_start_times, pred)):
             text += f"[Segment {i+1}/{n_segments}, start time {get_timestamp(seconds)}]\n"
             text += f"{output['text']}\n"
-            text += f"[Translation]\n{get_translation(output['text'])}\n\n"
         return text
     else:
@@ -99,11 +111,12 @@ def transcribe(audio, url, seconds_max):
     return text
 iface = gr.Interface(
-    fn=transcribe,
     inputs=[
-        gr.Audio(source="microphone", type="filepath", label="Transcribe from Microphone"),
-        gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed", label="Transcribe from YouTube URL"),
-        gr.Slider(minimum=30, maximum=300, value=30, step=30, label="Number of seconds to transcribe from YouTube URL")
     ],
     outputs="text",
     title="Whisper Small Swedish",

 from datasets import Dataset, Audio
 from moviepy.editor import AudioFileClip
+import googletrans
+from googletrans import Translator
 pipe = pipeline(model="Neprox/model")
+translator = Translator()
+# Get languages available for translation
+languages = []
+for code, name in googletrans.LANGUAGES.items():
+    languages.append((code, name.capitalize()))
 def download_from_youtube(url):
     """
     return segment_paths, segment_start_times
+def get_translation(text, dest="en"):
     """
+    Translates the given Swedish text to the language specified.
     """
+    dest_text = dest[0]
+    result = translator.translate(text, dest_text, 'sv')
+    return result.text
+def translate(audio, url, seconds_max, dest_language):
     """
+    Translates a YouTube video if a url is specified and returns the transcription.
+    If not url is specified, it translates the audio file as passed by Gradio.
     :param audio: Audio file as passed by Gradio. Only used if no url is specified.
+    :param url: URL of the YouTube video to translate.
     :param seconds_max: Maximum number of seconds to consider. If the audio file is longer than this, it will be truncated.
     """
     if url:
         for i, (seconds, output) in enumerate(zip(segment_start_times, pred)):
             text += f"[Segment {i+1}/{n_segments}, start time {get_timestamp(seconds)}]\n"
             text += f"{output['text']}\n"
+            text += f"[Translation ({dest_language})]\n"
+            text += f"{get_translation(output['text'], dest_language)}\n\n"
         return text
     else:
     return text
 iface = gr.Interface(
+    fn=translate,
     inputs=[
+        gr.Audio(source="microphone", type="filepath", label="Translate from Microphone"),
+        gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be translated", label="Translate from YouTube URL"),
+        gr.Slider(minimum=30, maximum=300, value=30, step=30, label="Number of seconds to translate from YouTube URL"),
+        gr.Dropdown(languages, label="Destination language")
     ],
     outputs="text",
     title="Whisper Small Swedish",

requirements.txt CHANGED Viewed

@@ -7,3 +7,4 @@ torch
 torchaudio
 moviepy
 git+https://github.com/pytube/pytube

 torchaudio
 moviepy
 git+https://github.com/pytube/pytube
+googletrans-py==4.0.0rc1