tilos commited on
Commit
574cd0e
·
verified ·
1 Parent(s): 4e2f811

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -9
app.py CHANGED
@@ -1,18 +1,94 @@
1
  from transformers import pipeline
2
  import gradio as gr
 
 
 
 
 
3
 
4
- pipe = pipeline(model="Chenzhou/Whisper-zh-HK") # change to "your-username/the-name-you-picked"
 
 
 
 
 
5
 
6
  def transcribe(audio):
7
  text = pipe(audio)["text"]
8
  return text
9
 
10
- iface = gr.Interface(
11
- fn=transcribe,
12
- inputs=gr.Audio(source="microphone", type="filepath"),
13
- outputs="text",
14
- title="Whisper Small Cantonese",
15
- description="Realtime demo for Cantonese speech recognition using a fine-tuned Whisper small model.",
16
- )
17
 
18
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ import os
4
+ import subprocess
5
+ from pytube import YouTube
6
+
7
+ pipe = pipeline(model="tilos/whisper-small-zh-HK") # change to "your-username/the-name-you-picked"
8
 
9
+ def video2mp3(video_file, output_ext="mp3"):
10
+ filename, ext = os.path.splitext(video_file)
11
+ subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
12
+ stdout=subprocess.DEVNULL,
13
+ stderr=subprocess.STDOUT)
14
+ return f"{filename}.{output_ext}"
15
 
16
  def transcribe(audio):
17
  text = pipe(audio)["text"]
18
  return text
19
 
 
 
 
 
 
 
 
20
 
21
+ def get_text(url):
22
+ result = pipe(get_audio(url))
23
+ return result['text'].strip()
24
+
25
+ def get_audio(url):
26
+ website = YouTube(url)
27
+ video = website.streams.filter(only_audio=True).first()
28
+ out_file = video.download(output_path=".")
29
+ base, ext = os.path.splitext(out_file)
30
+ new_file = base + '.mp3'
31
+ os.rename(out_file, new_file)
32
+ audio = new_file
33
+ return audio
34
+
35
+ def offline_video(video):
36
+ audio_file = video2mp3(video)
37
+ text = transcribe(audio_file)
38
+ return text
39
+
40
+
41
+ with gr.Blocks() as demo:
42
+
43
+ # video file input
44
+ gr.Interface(
45
+ title="Whisper: Real Time Cantonese Recognition",
46
+ description="Realtime demo for Cantonese speech recognition using a fine-tuned Whisper small model. "
47
+ "Generate zh-HK subtitle from video file, audio file, your microphone, and Youtube URL",
48
+ fn=offline_video,
49
+ inputs="video",
50
+ outputs="text",
51
+ allow_flagging="never",
52
+ )
53
+
54
+ # audio file input
55
+ with gr.Row():
56
+ with gr.Column():
57
+ input_audio = gr.Audio(source="upload", type="filepath")
58
+ micro_btn = gr.Button('Generate Voice Subtitles')
59
+ with gr.Column():
60
+ output_audio = gr.Textbox(placeholder='Transcript from audio', label='Subtitles')
61
+ micro_btn.click(transcribe, inputs=input_audio, outputs=output_audio)
62
+ """
63
+ gr.Interface(
64
+ fn=transcribe,
65
+ title="Whisper: zh-HK Subtitle Generator",
66
+ description="Generate zh-HK subtitle from audio file, your microphone and Youtube",
67
+ inputs = gr.Audio(source="upload", type="filepath", optional=True),
68
+ outputs = "text",
69
+ allow_flagging= "never",
70
+ )
71
+ """
72
+
73
+ # microphone input
74
+ with gr.Row():
75
+ with gr.Column():
76
+ input_mircro = gr.Audio(source="microphone", type="filepath")
77
+ micro_btn = gr.Button('Generate Voice Subtitles')
78
+ with gr.Column():
79
+ output_micro = gr.Textbox(placeholder='Transcript from mic', label='Subtitles')
80
+ micro_btn.click(transcribe, inputs=input_mircro, outputs=output_micro)
81
+
82
+ # Youtube url input
83
+ with gr.Row():
84
+ with gr.Column():
85
+ inputs_url = gr.Textbox(placeholder='Youtube URL', label='URL')
86
+ url_btn = gr.Button('Generate Youtube Video Subtitles')
87
+ examples = gr.Examples(examples=["https://www.youtube.com/watch?v=Yw4EoGWe0vw"],inputs=[inputs_url])
88
+ with gr.Column():
89
+ output_url = gr.Textbox(placeholder='Transcript from video.', label='Transcript')
90
+ url_btn.click(get_text, inputs=inputs_url, outputs=output_url )
91
+
92
+
93
+
94
+ demo.launch(debug=True)