import torch from transformers import pipeline from transformers.pipelines.audio_utils import ffmpeg_read import gradio as gr MODEL_NAME = "JackismyShephard/whisper-medium.en-finetuned-gtzan" device = 0 if torch.cuda.is_available() else "cpu" pipe = pipeline( task="audio-classification", model=MODEL_NAME, device=device, ) def classify_audio(filepath): preds = pipe(filepath) outputs = {} for p in preds: outputs[p["label"]] = p["score"] return outputs demo = gr.Interface( fn=classify_audio, inputs= gr.Audio(sources=["upload", "microphone"], label="Audio file", type="filepath"), outputs=gr.Label(), title="Classify Genres of Music", description=( "Classify long-form audio or microphone inputs with the click of a button! Demo uses the" f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to classify audio files" " of arbitrary length." ), examples="./examples", cache_examples=True, allow_flagging="never", ) demo.launch()