Spaces:

yuangongfdu
/

whisper-at

Running

App Files Files Community

yuangongfdu commited on Jul 8, 2023

Commit

0ea1ca0

•

1 Parent(s): 7653446

Create app.py

Browse files

Files changed (1) hide show

app.py +36 -0

app.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import gradio as gr
+import whisper_at
+link = "https://github.com/YuanGongND/whisper-AT"
+text = "[Github]"
+paper_link = "https://arxiv.org/pdf/2307.03183.pdf"
+paper_text = "[Paper]"
+def predict(audio_path, time_resolution):
+    def round_time_resolution(time_resolution):
+        multiple = time_resolution / 0.4
+        rounded_multiple = round(multiple)
+        rounded_time_resolution = rounded_multiple * 0.4
+        return rounded_time_resolution
+    audio_tagging_time_resolution = round_time_resolution(time_resolution)
+    model = whisper.load_model("large-v1")
+    result = model.transcribe(audio_path, at_time_res=audio_tagging_time_resolution)
+    # ASR Results
+    print(result["text"])
+    # Audio Tagging Results
+    audio_tag_result = whisper.parse_at_label(result, language='follow_asr', top_k=5, p_threshold=-1, include_class_list=list(range(527)))
+    print(audio_tag_result)
+    asr_output = ""
+    for segment in result['segments']:
+      asr_output = asr_output + segment['start'] + 's-' + segment['end'] + 's' + segment['text'] + '\n'
+    return asr_output, audio_tag_result
+iface = gr.Interface(fn=predict,
+                    inputs=[gr.Audio(type="filepath"), gr.Textbox(value='10', label='Time Resolution in Seconds (Must be must be an integer multiple of 0.4, e.g., 0.4, 2, 10)')],
+                    outputs=[gr.Textbox(label="ASR Output"), gr.Textbox(label="Audio Tagging Output")],
+                    cache_examples=True,
+                    title="Quick Demo of Whisper-AT",
+                    description="We are glad to introduce Whisper-AT - A new joint audio tagging and speech recognition model. It outputs background sound labels in addition to text." + f"<a href='{paper_link}'>{paper_text}</a> " + f"<a href='{link}'>{text}</a> <br>" +
+                    "Whisper-AT is authored by Yuan Gong, Sameer Khurana, Leonid Karlinsky, and James Glass (MIT & MIT-IBM Watson AI Lab).")
+iface.launch(debug=False, share=True)