# from transformers import pipeline
# import gradio as gr

# pipe = pipeline(model="kk90ujhun/whisper-small-zh")  # change to "your-username/the-name-you-picked"

# def transcribe(audio):
#     text = pipe(audio)["text"]
#     return text

# iface = gr.Interface(
#     fn=transcribe, 
#     inputs=gr.Audio(source="microphone", type="filepath"), 
#     outputs="text",
#     title="Whisper Small Chinese",
#     description="Realtime demo for Chinese speech recognition using a fine-tuned Whisper small model.",
# )

# iface.launch()

from transformers import pipeline
from pytube import YouTube
import gradio as gr

pipe = pipeline(model="kk90ujhun/whisper-small-zh")


def transcribe(url):
	audio = YouTube(url).streams.filter(file_extension='mp4', only_audio=True).first().download()

	text = pipe(audio, batch_size=512, truncation=True)["text"]

	return text


iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Textbox(label="Enter a YouTube URL:"),
	outputs="text",
	title="Whisper Small Chinese",
	description="Transcribe Chinese videos",
)

iface.launch()