|
import whisper |
|
import os |
|
import datetime |
|
import srt |
|
from moviepy.editor import VideoFileClip |
|
import gradio as gr |
|
import tempfile |
|
|
|
|
|
model_sizes = ['tiny', 'base', 'small'] |
|
models = {size: whisper.load_model(size) for size in model_sizes} |
|
|
|
|
|
tasks = ['transcribe', 'translate'] |
|
|
|
|
|
output_formats = { |
|
'transcribe': ['Transcription (.txt)', 'Subtitles (.srt)'], |
|
'translate': ['Translation (.txt)', 'Translated Subtitles (.srt)'] |
|
} |
|
|
|
|
|
languages = ['Auto-detect', 'en', 'zh', 'fr', 'es', 'de', 'ja', 'ko'] |
|
|
|
def is_video_file(file_path): |
|
video_extensions = ['.mp4', '.avi', '.mov', '.mkv'] |
|
ext = os.path.splitext(file_path)[-1].lower() |
|
return ext in video_extensions |
|
|
|
def extract_audio_from_video(video_path): |
|
audio_path = video_path.rsplit('.', 1)[0] + '.mp3' |
|
video = VideoFileClip(video_path) |
|
video.audio.write_audiofile(audio_path, codec='mp3') |
|
return audio_path |
|
|
|
def generate_output(file_obj, model_size, task, output_format, language): |
|
with tempfile.TemporaryDirectory() as tmpdirname: |
|
|
|
file_name = os.path.join(tmpdirname, file_obj.name) |
|
with open(file_name, 'wb') as f: |
|
f.write(file_obj.read()) |
|
|
|
|
|
if is_video_file(file_name): |
|
audio_path = extract_audio_from_video(file_name) |
|
else: |
|
audio_path = file_name |
|
|
|
|
|
model = models[model_size] |
|
|
|
|
|
result = model.transcribe( |
|
audio_path, |
|
task=task, |
|
language=None if language == "Auto-detect" else language |
|
) |
|
|
|
base_filename = os.path.splitext(file_name)[0] |
|
|
|
|
|
if 'Subtitles' in output_format: |
|
|
|
subtitles = [] |
|
for segment in result['segments']: |
|
start = datetime.timedelta(seconds=segment['start']) |
|
end = datetime.timedelta(seconds=segment['end']) |
|
text = segment['text'] |
|
|
|
subtitle = srt.Subtitle(index=len(subtitles)+1, start=start, end=end, content=text) |
|
subtitles.append(subtitle) |
|
|
|
srt_content = srt.compose(subtitles) |
|
output_file = base_filename + '.srt' |
|
with open(output_file, "w", encoding='utf-8') as file: |
|
file.write(srt_content) |
|
else: |
|
|
|
transcription_text = " ".join([segment['text'] for segment in result['segments']]) |
|
output_file = base_filename + '.txt' |
|
with open(output_file, "w", encoding='utf-8') as file: |
|
file.write(transcription_text) |
|
|
|
return output_file |
|
|
|
def update_output_format(task): |
|
return gr.Dropdown.update(choices=output_formats[task], value=output_formats[task][0]) |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# ๐ผ Video Transcription and Subtitles Generator") |
|
gr.Markdown("Upload a video or audio file to get the transcription or subtitles.") |
|
|
|
with gr.Row(): |
|
file_input = gr.File( |
|
label="Upload Video or Audio File", |
|
file_types=['video', 'audio'] |
|
) |
|
|
|
with gr.Row(): |
|
model_size_input = gr.Dropdown( |
|
label="Select Whisper Model Size", |
|
choices=model_sizes, |
|
value='small' |
|
) |
|
task_input = gr.Dropdown( |
|
label="Select Task", |
|
choices=tasks, |
|
value='transcribe' |
|
) |
|
output_format_input = gr.Dropdown( |
|
label="Select Output Format", |
|
choices=output_formats['transcribe'], |
|
value=output_formats['transcribe'][0] |
|
) |
|
language_input = gr.Dropdown( |
|
label="Select Original Language (Optional)", |
|
choices=languages, |
|
value='Auto-detect' |
|
) |
|
|
|
task_input.change( |
|
fn=update_output_format, |
|
inputs=task_input, |
|
outputs=output_format_input |
|
) |
|
|
|
submit_button = gr.Button("Generate") |
|
output_file = gr.File(label="Download Output File") |
|
|
|
submit_button.click( |
|
fn=generate_output, |
|
inputs=[ |
|
file_input, |
|
model_size_input, |
|
task_input, |
|
output_format_input, |
|
language_input |
|
], |
|
outputs=output_file |
|
) |
|
|
|
|
|
def toggle_theme(): |
|
if demo.theme == gr.themes.Default(): |
|
demo.theme = gr.themes.Monokai() |
|
else: |
|
demo.theme = gr.themes.Default() |
|
return gr.update() |
|
|
|
theme_button = gr.Button("Toggle Theme") |
|
theme_button.click(fn=toggle_theme, outputs=[]) |
|
|
|
demo.launch() |
|
|