Maxkillor commited on
Commit
70a6679
1 Parent(s): 8394486

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +153 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import whisper
2
+ import os
3
+ import datetime
4
+ import srt
5
+ from moviepy.editor import VideoFileClip
6
+ import gradio as gr
7
+ import tempfile
8
+
9
+ # Load the Whisper models once at startup
10
+ model_sizes = ['tiny', 'base', 'small']
11
+ models = {size: whisper.load_model(size) for size in model_sizes}
12
+
13
+ # Task options
14
+ tasks = ['transcribe', 'translate']
15
+
16
+ # Output format options
17
+ output_formats = {
18
+ 'transcribe': ['Transcription (.txt)', 'Subtitles (.srt)'],
19
+ 'translate': ['Translation (.txt)', 'Translated Subtitles (.srt)']
20
+ }
21
+
22
+ # Language options
23
+ languages = ['Auto-detect', 'en', 'zh', 'fr', 'es', 'de', 'ja', 'ko']
24
+
25
+ def is_video_file(file_path):
26
+ video_extensions = ['.mp4', '.avi', '.mov', '.mkv']
27
+ ext = os.path.splitext(file_path)[-1].lower()
28
+ return ext in video_extensions
29
+
30
+ def extract_audio_from_video(video_path):
31
+ audio_path = video_path.rsplit('.', 1)[0] + '.mp3'
32
+ video = VideoFileClip(video_path)
33
+ video.audio.write_audiofile(audio_path, codec='mp3')
34
+ return audio_path
35
+
36
+ def generate_output(file_obj, model_size, task, output_format, language):
37
+ with tempfile.TemporaryDirectory() as tmpdirname:
38
+ # Save the uploaded file
39
+ file_name = os.path.join(tmpdirname, file_obj.name)
40
+ with open(file_name, 'wb') as f:
41
+ f.write(file_obj.read())
42
+
43
+ # If it's a video file, extract the audio
44
+ if is_video_file(file_name):
45
+ audio_path = extract_audio_from_video(file_name)
46
+ else:
47
+ audio_path = file_name
48
+
49
+ # Select the pre-loaded model
50
+ model = models[model_size]
51
+
52
+ # Transcribe or translate the audio
53
+ result = model.transcribe(
54
+ audio_path,
55
+ task=task,
56
+ language=None if language == "Auto-detect" else language
57
+ )
58
+
59
+ base_filename = os.path.splitext(file_name)[0]
60
+
61
+ # Prepare the output file
62
+ if 'Subtitles' in output_format:
63
+ # Generate SRT content
64
+ subtitles = []
65
+ for segment in result['segments']:
66
+ start = datetime.timedelta(seconds=segment['start'])
67
+ end = datetime.timedelta(seconds=segment['end'])
68
+ text = segment['text']
69
+
70
+ subtitle = srt.Subtitle(index=len(subtitles)+1, start=start, end=end, content=text)
71
+ subtitles.append(subtitle)
72
+
73
+ srt_content = srt.compose(subtitles)
74
+ output_file = base_filename + '.srt'
75
+ with open(output_file, "w", encoding='utf-8') as file:
76
+ file.write(srt_content)
77
+ else:
78
+ # Generate TXT content
79
+ transcription_text = " ".join([segment['text'] for segment in result['segments']])
80
+ output_file = base_filename + '.txt'
81
+ with open(output_file, "w", encoding='utf-8') as file:
82
+ file.write(transcription_text)
83
+
84
+ return output_file
85
+
86
+ def update_output_format(task):
87
+ return gr.Dropdown.update(choices=output_formats[task], value=output_formats[task][0])
88
+
89
+ with gr.Blocks() as demo:
90
+ gr.Markdown("# 📼 Video Transcription and Subtitles Generator")
91
+ gr.Markdown("Upload a video or audio file to get the transcription or subtitles.")
92
+
93
+ with gr.Row():
94
+ file_input = gr.File(
95
+ label="Upload Video or Audio File",
96
+ file_types=['video', 'audio']
97
+ )
98
+
99
+ with gr.Row():
100
+ model_size_input = gr.Dropdown(
101
+ label="Select Whisper Model Size",
102
+ choices=model_sizes,
103
+ value='small'
104
+ )
105
+ task_input = gr.Dropdown(
106
+ label="Select Task",
107
+ choices=tasks,
108
+ value='transcribe'
109
+ )
110
+ output_format_input = gr.Dropdown(
111
+ label="Select Output Format",
112
+ choices=output_formats['transcribe'],
113
+ value=output_formats['transcribe'][0]
114
+ )
115
+ language_input = gr.Dropdown(
116
+ label="Select Original Language (Optional)",
117
+ choices=languages,
118
+ value='Auto-detect'
119
+ )
120
+
121
+ task_input.change(
122
+ fn=update_output_format,
123
+ inputs=task_input,
124
+ outputs=output_format_input
125
+ )
126
+
127
+ submit_button = gr.Button("Generate")
128
+ output_file = gr.File(label="Download Output File")
129
+
130
+ submit_button.click(
131
+ fn=generate_output,
132
+ inputs=[
133
+ file_input,
134
+ model_size_input,
135
+ task_input,
136
+ output_format_input,
137
+ language_input
138
+ ],
139
+ outputs=output_file
140
+ )
141
+
142
+ # Toggle between light and dark mode
143
+ def toggle_theme():
144
+ if demo.theme == gr.themes.Default():
145
+ demo.theme = gr.themes.Monokai()
146
+ else:
147
+ demo.theme = gr.themes.Default()
148
+ return gr.update()
149
+
150
+ theme_button = gr.Button("Toggle Theme")
151
+ theme_button.click(fn=toggle_theme, outputs=[])
152
+
153
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai-whisper==20230314
2
+ moviepy==1.0.3
3
+ srt==3.5.2
4
+ gradio==3.41.2
5
+ ffmpeg-python==0.2.0