# -*- coding: utf-8 -*- """app.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1SLY6vFgJGYJxXCiJWtRo3Qxag5r_Y4K7 """ !pip install transformers !pip install pytube !pip install gradio import os import gradio as gr from transformers import pipeline from pytube import YouTube #pipe = pipeline(model="jdowling/whisper-small-hi") # change to "your-username/the-name-you-picked" 加 def yt(link): yt = YouTube(link) stream = yt.streams.filter(only_audio=True)[0] stream.download(filename="audio.mp3") text = pipe("audio.mp3")["text"] return text def transcribe(audio): text = pipe(audio)["text"] return text demo = gr.Blocks() iface = gr.Interface( fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text", title="Whisper Small Swedish-Microphone", description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model. An audio for recognize.", ) yt = gr.Interface( fn=yt, inputs=[gr.inputs.Textbox(lines=1, label="Youtube URL")], outputs=["html", "text"], title="Whisper Small Swedish-Youtube", description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model. A Youtube URL for recognize." ) with demo: gr.TabbedInterface([iface, yt], ["Transcribe Audio", "Transcribe YouTube"]) demo.launch(enable_queue=True)