Spaces:

Maxkillor
/

video-transcription-and-subtitling-1.0

Sleeping

App Files Files Community

Maxkillor commited on Nov 7

Commit

70a6679

•

1 Parent(s): 8394486

Upload 2 files

Browse files

Files changed (2) hide show

app.py +153 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import whisper
+import os
+import datetime
+import srt
+from moviepy.editor import VideoFileClip
+import gradio as gr
+import tempfile
+# Load the Whisper models once at startup
+model_sizes = ['tiny', 'base', 'small']
+models = {size: whisper.load_model(size) for size in model_sizes}
+# Task options
+tasks = ['transcribe', 'translate']
+# Output format options
+output_formats = {
+    'transcribe': ['Transcription (.txt)', 'Subtitles (.srt)'],
+    'translate': ['Translation (.txt)', 'Translated Subtitles (.srt)']
+}
+# Language options
+languages = ['Auto-detect', 'en', 'zh', 'fr', 'es', 'de', 'ja', 'ko']
+def is_video_file(file_path):
+    video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
+    ext = os.path.splitext(file_path)[-1].lower()
+    return ext in video_extensions
+def extract_audio_from_video(video_path):
+    audio_path = video_path.rsplit('.', 1)[0] + '.mp3'
+    video = VideoFileClip(video_path)
+    video.audio.write_audiofile(audio_path, codec='mp3')
+    return audio_path
+def generate_output(file_obj, model_size, task, output_format, language):
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        # Save the uploaded file
+        file_name = os.path.join(tmpdirname, file_obj.name)
+        with open(file_name, 'wb') as f:
+            f.write(file_obj.read())
+        # If it's a video file, extract the audio
+        if is_video_file(file_name):
+            audio_path = extract_audio_from_video(file_name)
+        else:
+            audio_path = file_name
+        # Select the pre-loaded model
+        model = models[model_size]
+        # Transcribe or translate the audio
+        result = model.transcribe(
+            audio_path,
+            task=task,
+            language=None if language == "Auto-detect" else language
+        )
+        base_filename = os.path.splitext(file_name)[0]
+        # Prepare the output file
+        if 'Subtitles' in output_format:
+            # Generate SRT content
+            subtitles = []
+            for segment in result['segments']:
+                start = datetime.timedelta(seconds=segment['start'])
+                end = datetime.timedelta(seconds=segment['end'])
+                text = segment['text']
+                subtitle = srt.Subtitle(index=len(subtitles)+1, start=start, end=end, content=text)
+                subtitles.append(subtitle)
+            srt_content = srt.compose(subtitles)
+            output_file = base_filename + '.srt'
+            with open(output_file, "w", encoding='utf-8') as file:
+                file.write(srt_content)
+        else:
+            # Generate TXT content
+            transcription_text = " ".join([segment['text'] for segment in result['segments']])
+            output_file = base_filename + '.txt'
+            with open(output_file, "w", encoding='utf-8') as file:
+                file.write(transcription_text)
+        return output_file
+def update_output_format(task):
+    return gr.Dropdown.update(choices=output_formats[task], value=output_formats[task][0])
+with gr.Blocks() as demo:
+    gr.Markdown("# 📼 Video Transcription and Subtitles Generator")
+    gr.Markdown("Upload a video or audio file to get the transcription or subtitles.")
+    with gr.Row():
+        file_input = gr.File(
+            label="Upload Video or Audio File",
+            file_types=['video', 'audio']
+        )
+    with gr.Row():
+        model_size_input = gr.Dropdown(
+            label="Select Whisper Model Size",
+            choices=model_sizes,
+            value='small'
+        )
+        task_input = gr.Dropdown(
+            label="Select Task",
+            choices=tasks,
+            value='transcribe'
+        )
+        output_format_input = gr.Dropdown(
+            label="Select Output Format",
+            choices=output_formats['transcribe'],
+            value=output_formats['transcribe'][0]
+        )
+        language_input = gr.Dropdown(
+            label="Select Original Language (Optional)",
+            choices=languages,
+            value='Auto-detect'
+        )
+    task_input.change(
+        fn=update_output_format,
+        inputs=task_input,
+        outputs=output_format_input
+    )
+    submit_button = gr.Button("Generate")
+    output_file = gr.File(label="Download Output File")
+    submit_button.click(
+        fn=generate_output,
+        inputs=[
+            file_input,
+            model_size_input,
+            task_input,
+            output_format_input,
+            language_input
+        ],
+        outputs=output_file
+    )
+    # Toggle between light and dark mode
+    def toggle_theme():
+        if demo.theme == gr.themes.Default():
+            demo.theme = gr.themes.Monokai()
+        else:
+            demo.theme = gr.themes.Default()
+        return gr.update()
+    theme_button = gr.Button("Toggle Theme")
+    theme_button.click(fn=toggle_theme, outputs=[])
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+openai-whisper==20230314
+moviepy==1.0.3
+srt==3.5.2
+gradio==3.41.2
+ffmpeg-python==0.2.0