Spaces:
Sleeping
Sleeping
File size: 3,292 Bytes
e767c1f 574cd0e e767c1f 574cd0e e767c1f 574cd0e be3e689 574cd0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
from transformers import pipeline
import gradio as gr
import os
import subprocess
from pytube import YouTube
pipe = pipeline(model="tilos/whisper-small-zh-HK") # change to "your-username/the-name-you-picked"
def video2mp3(video_file, output_ext="mp3"):
filename, ext = os.path.splitext(video_file)
subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
stdout=subprocess.DEVNULL,
stderr=subprocess.STDOUT)
return f"{filename}.{output_ext}"
def transcribe(audio):
text = pipe(audio)["text"]
return text
def get_text(url):
result = pipe(get_audio(url))
return result['text'].strip()
def get_audio(url):
website = YouTube(url)
video = website.streams.filter(only_audio=True).first()
out_file = video.download(output_path=".")
base, ext = os.path.splitext(out_file)
new_file = base + '.mp3'
os.rename(out_file, new_file)
audio = new_file
return audio
def offline_video(video):
audio_file = video2mp3(video)
text = transcribe(audio_file)
return text
with gr.Blocks() as demo:
# video file input
gr.Interface(
title="Cantonese Transcription using Whisper",
description="Demo for Cantonese speech recognition using a fine-tuned Whisper small model. "
"Generate zh-HK subtitle from video file, audio file, your microphone, and Youtube URL",
fn=offline_video,
inputs="video",
outputs="text",
allow_flagging="never",
)
# audio file input
with gr.Row():
with gr.Column():
input_audio = gr.Audio(source="upload", type="filepath")
micro_btn = gr.Button('Generate Voice Subtitles')
with gr.Column():
output_audio = gr.Textbox(placeholder='Transcript from audio', label='Subtitles')
micro_btn.click(transcribe, inputs=input_audio, outputs=output_audio)
"""
gr.Interface(
fn=transcribe,
title="Whisper: zh-HK Subtitle Generator",
description="Generate zh-HK subtitle from audio file, your microphone and Youtube",
inputs = gr.Audio(source="upload", type="filepath", optional=True),
outputs = "text",
allow_flagging= "never",
)
"""
# microphone input
with gr.Row():
with gr.Column():
input_mircro = gr.Audio(source="microphone", type="filepath")
micro_btn = gr.Button('Generate Voice Subtitles')
with gr.Column():
output_micro = gr.Textbox(placeholder='Transcript from mic', label='Subtitles')
micro_btn.click(transcribe, inputs=input_mircro, outputs=output_micro)
# Youtube url input
with gr.Row():
with gr.Column():
inputs_url = gr.Textbox(placeholder='Youtube URL', label='URL')
url_btn = gr.Button('Generate Youtube Video Subtitles')
examples = gr.Examples(examples=["https://www.youtube.com/watch?v=Yw4EoGWe0vw"],inputs=[inputs_url])
with gr.Column():
output_url = gr.Textbox(placeholder='Transcript from video.', label='Transcript')
url_btn.click(get_text, inputs=inputs_url, outputs=output_url )
demo.launch(debug=True) |