Upload 2 files
Browse files- app.py +153 -0
- requirements.txt +5 -0
app.py
ADDED
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import whisper
|
2 |
+
import os
|
3 |
+
import datetime
|
4 |
+
import srt
|
5 |
+
from moviepy.editor import VideoFileClip
|
6 |
+
import gradio as gr
|
7 |
+
import tempfile
|
8 |
+
|
9 |
+
# Load the Whisper models once at startup
|
10 |
+
model_sizes = ['tiny', 'base', 'small']
|
11 |
+
models = {size: whisper.load_model(size) for size in model_sizes}
|
12 |
+
|
13 |
+
# Task options
|
14 |
+
tasks = ['transcribe', 'translate']
|
15 |
+
|
16 |
+
# Output format options
|
17 |
+
output_formats = {
|
18 |
+
'transcribe': ['Transcription (.txt)', 'Subtitles (.srt)'],
|
19 |
+
'translate': ['Translation (.txt)', 'Translated Subtitles (.srt)']
|
20 |
+
}
|
21 |
+
|
22 |
+
# Language options
|
23 |
+
languages = ['Auto-detect', 'en', 'zh', 'fr', 'es', 'de', 'ja', 'ko']
|
24 |
+
|
25 |
+
def is_video_file(file_path):
|
26 |
+
video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
|
27 |
+
ext = os.path.splitext(file_path)[-1].lower()
|
28 |
+
return ext in video_extensions
|
29 |
+
|
30 |
+
def extract_audio_from_video(video_path):
|
31 |
+
audio_path = video_path.rsplit('.', 1)[0] + '.mp3'
|
32 |
+
video = VideoFileClip(video_path)
|
33 |
+
video.audio.write_audiofile(audio_path, codec='mp3')
|
34 |
+
return audio_path
|
35 |
+
|
36 |
+
def generate_output(file_obj, model_size, task, output_format, language):
|
37 |
+
with tempfile.TemporaryDirectory() as tmpdirname:
|
38 |
+
# Save the uploaded file
|
39 |
+
file_name = os.path.join(tmpdirname, file_obj.name)
|
40 |
+
with open(file_name, 'wb') as f:
|
41 |
+
f.write(file_obj.read())
|
42 |
+
|
43 |
+
# If it's a video file, extract the audio
|
44 |
+
if is_video_file(file_name):
|
45 |
+
audio_path = extract_audio_from_video(file_name)
|
46 |
+
else:
|
47 |
+
audio_path = file_name
|
48 |
+
|
49 |
+
# Select the pre-loaded model
|
50 |
+
model = models[model_size]
|
51 |
+
|
52 |
+
# Transcribe or translate the audio
|
53 |
+
result = model.transcribe(
|
54 |
+
audio_path,
|
55 |
+
task=task,
|
56 |
+
language=None if language == "Auto-detect" else language
|
57 |
+
)
|
58 |
+
|
59 |
+
base_filename = os.path.splitext(file_name)[0]
|
60 |
+
|
61 |
+
# Prepare the output file
|
62 |
+
if 'Subtitles' in output_format:
|
63 |
+
# Generate SRT content
|
64 |
+
subtitles = []
|
65 |
+
for segment in result['segments']:
|
66 |
+
start = datetime.timedelta(seconds=segment['start'])
|
67 |
+
end = datetime.timedelta(seconds=segment['end'])
|
68 |
+
text = segment['text']
|
69 |
+
|
70 |
+
subtitle = srt.Subtitle(index=len(subtitles)+1, start=start, end=end, content=text)
|
71 |
+
subtitles.append(subtitle)
|
72 |
+
|
73 |
+
srt_content = srt.compose(subtitles)
|
74 |
+
output_file = base_filename + '.srt'
|
75 |
+
with open(output_file, "w", encoding='utf-8') as file:
|
76 |
+
file.write(srt_content)
|
77 |
+
else:
|
78 |
+
# Generate TXT content
|
79 |
+
transcription_text = " ".join([segment['text'] for segment in result['segments']])
|
80 |
+
output_file = base_filename + '.txt'
|
81 |
+
with open(output_file, "w", encoding='utf-8') as file:
|
82 |
+
file.write(transcription_text)
|
83 |
+
|
84 |
+
return output_file
|
85 |
+
|
86 |
+
def update_output_format(task):
|
87 |
+
return gr.Dropdown.update(choices=output_formats[task], value=output_formats[task][0])
|
88 |
+
|
89 |
+
with gr.Blocks() as demo:
|
90 |
+
gr.Markdown("# 📼 Video Transcription and Subtitles Generator")
|
91 |
+
gr.Markdown("Upload a video or audio file to get the transcription or subtitles.")
|
92 |
+
|
93 |
+
with gr.Row():
|
94 |
+
file_input = gr.File(
|
95 |
+
label="Upload Video or Audio File",
|
96 |
+
file_types=['video', 'audio']
|
97 |
+
)
|
98 |
+
|
99 |
+
with gr.Row():
|
100 |
+
model_size_input = gr.Dropdown(
|
101 |
+
label="Select Whisper Model Size",
|
102 |
+
choices=model_sizes,
|
103 |
+
value='small'
|
104 |
+
)
|
105 |
+
task_input = gr.Dropdown(
|
106 |
+
label="Select Task",
|
107 |
+
choices=tasks,
|
108 |
+
value='transcribe'
|
109 |
+
)
|
110 |
+
output_format_input = gr.Dropdown(
|
111 |
+
label="Select Output Format",
|
112 |
+
choices=output_formats['transcribe'],
|
113 |
+
value=output_formats['transcribe'][0]
|
114 |
+
)
|
115 |
+
language_input = gr.Dropdown(
|
116 |
+
label="Select Original Language (Optional)",
|
117 |
+
choices=languages,
|
118 |
+
value='Auto-detect'
|
119 |
+
)
|
120 |
+
|
121 |
+
task_input.change(
|
122 |
+
fn=update_output_format,
|
123 |
+
inputs=task_input,
|
124 |
+
outputs=output_format_input
|
125 |
+
)
|
126 |
+
|
127 |
+
submit_button = gr.Button("Generate")
|
128 |
+
output_file = gr.File(label="Download Output File")
|
129 |
+
|
130 |
+
submit_button.click(
|
131 |
+
fn=generate_output,
|
132 |
+
inputs=[
|
133 |
+
file_input,
|
134 |
+
model_size_input,
|
135 |
+
task_input,
|
136 |
+
output_format_input,
|
137 |
+
language_input
|
138 |
+
],
|
139 |
+
outputs=output_file
|
140 |
+
)
|
141 |
+
|
142 |
+
# Toggle between light and dark mode
|
143 |
+
def toggle_theme():
|
144 |
+
if demo.theme == gr.themes.Default():
|
145 |
+
demo.theme = gr.themes.Monokai()
|
146 |
+
else:
|
147 |
+
demo.theme = gr.themes.Default()
|
148 |
+
return gr.update()
|
149 |
+
|
150 |
+
theme_button = gr.Button("Toggle Theme")
|
151 |
+
theme_button.click(fn=toggle_theme, outputs=[])
|
152 |
+
|
153 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai-whisper==20230314
|
2 |
+
moviepy==1.0.3
|
3 |
+
srt==3.5.2
|
4 |
+
gradio==3.41.2
|
5 |
+
ffmpeg-python==0.2.0
|