from transformers import pipeline import gradio as gr import os import subprocess from pytube import YouTube import subprocess pipe = pipeline(model="tilos/whisper-small-zh-HK") # change to "your-username/the-name-you-picked" def video2mp3(video_file, output_ext="mp3"): filename, ext = os.path.splitext(video_file) subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) return f"{filename}.{output_ext}" def transcribe(audio): text = pipe(audio)["text"] return text def gen_sub(video): result = subprocess.check_output(['auto_subtitle','./example.mp4','--model', 'medium']) return result def get_text(url): result = pipe(get_audio(url)) return result['text'].strip() def get_audio(url): website = YouTube(url) video = website.streams.filter(only_audio=True).first() out_file = video.download(output_path=".") base, ext = os.path.splitext(out_file) new_file = base + '.mp3' os.rename(out_file, new_file) audio = new_file return audio def offline_video(video): audio_file = video2mp3(video) text = transcribe(audio_file) return text with gr.Blocks() as demo: # video file input gr.Interface( title="Whisper: Real Time Cantonese Recognition", description="Realtime demo for Cantonese speech recognition using a fine-tuned Whisper small model. " "Generate zh-HK subtitle from video file, audio file, your microphone, and Youtube URL", fn=offline_video, inputs="video", outputs="text", allow_flagging="never", ) # audio file input, generate subtitled video gr.Interface( description= "Generate Cantonese subtitled video from video file", fn=gen_sub, inputs="video", outputs="video", allow_flagging="never", ) # audio file input with gr.Row(): with gr.Column(): input_audio = gr.Audio(source="upload", type="filepath") micro_btn = gr.Button('Generate Voice Subtitles') with gr.Column(): output_audio = gr.Textbox(placeholder='Transcript from audio', label='Subtitles') micro_btn.click(transcribe, inputs=input_audio, outputs=output_audio) """ gr.Interface( fn=transcribe, title="Whisper: zh-HK Subtitle Generator", description="Generate zh-HK subtitle from audio file, your microphone and Youtube", inputs = gr.Audio(source="upload", type="filepath", optional=True), outputs = "text", allow_flagging= "never", ) """ # microphone input with gr.Row(): with gr.Column(): input_mircro = gr.Audio(source="microphone", type="filepath") micro_btn = gr.Button('Generate Voice Subtitles') with gr.Column(): output_micro = gr.Textbox(placeholder='Transcript from mic', label='Subtitles') micro_btn.click(transcribe, inputs=input_mircro, outputs=output_micro) # Youtube url input with gr.Row(): with gr.Column(): inputs_url = gr.Textbox(placeholder='Youtube URL', label='URL') url_btn = gr.Button('Generate Youtube Video Subtitles') examples = gr.Examples(examples=["https://www.youtube.com/watch?v=Yw4EoGWe0vw"],inputs=[inputs_url]) with gr.Column(): output_url = gr.Textbox(placeholder='Transcript from video.', label='Transcript') url_btn.click(get_text, inputs=inputs_url, outputs=output_url ) demo.launch(debug=True)