File size: 1,062 Bytes
f5d6a53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c17a45
f5d6a53
2c17a45
 
f5d6a53
 
 
 
 
 
 
 
 
2c17a45
 
 
f5d6a53
 
 
 
 
2c17a45
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# from transformers import pipeline
# import gradio as gr

# pipe = pipeline(model="kk90ujhun/whisper-small-zh")  # change to "your-username/the-name-you-picked"

# def transcribe(audio):
#     text = pipe(audio)["text"]
#     return text

# iface = gr.Interface(
#     fn=transcribe, 
#     inputs=gr.Audio(source="microphone", type="filepath"), 
#     outputs="text",
#     title="Whisper Small Chinese",
#     description="Realtime demo for Chinese speech recognition using a fine-tuned Whisper small model.",
# )

# iface.launch()

from transformers import pipeline
from pytube import YouTube
import gradio as gr

pipe = pipeline(model="kk90ujhun/whisper-small-zh")


def transcribe(url):
	audio = YouTube(url).streams.filter(file_extension='mp4', only_audio=True).first().download()

	text = pipe(audio, batch_size=512, truncation=True)["text"]

	return text


iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Textbox(label="Enter a YouTube URL:"),
	outputs="text",
	title="Whisper Small Chinese",
	description="Transcribe Chinese videos",
)

iface.launch()