# from transformers import pipeline # import gradio as gr # pipe = pipeline(model="kk90ujhun/whisper-small-zh") # change to "your-username/the-name-you-picked" # def transcribe(audio): # text = pipe(audio)["text"] # return text # iface = gr.Interface( # fn=transcribe, # inputs=gr.Audio(source="microphone", type="filepath"), # outputs="text", # title="Whisper Small Chinese", # description="Realtime demo for Chinese speech recognition using a fine-tuned Whisper small model.", # ) # iface.launch() from transformers import pipeline from pytube import YouTube import gradio as gr pipe = pipeline(model="kk90ujhun/whisper-small-zh") def transcribe(url): audio = YouTube(url).streams.filter(file_extension='mp4', only_audio=True).first().download() text = pipe(audio, batch_size=512, truncation=True)["text"] return text iface = gr.Interface( fn=transcribe, inputs=gr.Textbox(label="Enter a YouTube URL:"), outputs="text", title="Whisper Small Chinese", description="Transcribe Chinese videos", ) iface.launch()