yuangongfdu commited on
Commit
0ea1ca0
1 Parent(s): 7653446

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper_at
3
+
4
+ link = "https://github.com/YuanGongND/whisper-AT"
5
+ text = "[Github]"
6
+ paper_link = "https://arxiv.org/pdf/2307.03183.pdf"
7
+ paper_text = "[Paper]"
8
+
9
+ def predict(audio_path, time_resolution):
10
+ def round_time_resolution(time_resolution):
11
+ multiple = time_resolution / 0.4
12
+ rounded_multiple = round(multiple)
13
+ rounded_time_resolution = rounded_multiple * 0.4
14
+ return rounded_time_resolution
15
+ audio_tagging_time_resolution = round_time_resolution(time_resolution)
16
+ model = whisper.load_model("large-v1")
17
+ result = model.transcribe(audio_path, at_time_res=audio_tagging_time_resolution)
18
+ # ASR Results
19
+ print(result["text"])
20
+ # Audio Tagging Results
21
+ audio_tag_result = whisper.parse_at_label(result, language='follow_asr', top_k=5, p_threshold=-1, include_class_list=list(range(527)))
22
+ print(audio_tag_result)
23
+
24
+ asr_output = ""
25
+ for segment in result['segments']:
26
+ asr_output = asr_output + segment['start'] + 's-' + segment['end'] + 's' + segment['text'] + '\n'
27
+ return asr_output, audio_tag_result
28
+
29
+ iface = gr.Interface(fn=predict,
30
+ inputs=[gr.Audio(type="filepath"), gr.Textbox(value='10', label='Time Resolution in Seconds (Must be must be an integer multiple of 0.4, e.g., 0.4, 2, 10)')],
31
+ outputs=[gr.Textbox(label="ASR Output"), gr.Textbox(label="Audio Tagging Output")],
32
+ cache_examples=True,
33
+ title="Quick Demo of Whisper-AT",
34
+ description="We are glad to introduce Whisper-AT - A new joint audio tagging and speech recognition model. It outputs background sound labels in addition to text." + f"<a href='{paper_link}'>{paper_text}</a> " + f"<a href='{link}'>{text}</a> <br>" +
35
+ "Whisper-AT is authored by Yuan Gong, Sameer Khurana, Leonid Karlinsky, and James Glass (MIT & MIT-IBM Watson AI Lab).")
36
+ iface.launch(debug=False, share=True)