import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr

MODEL_NAME = "JackismyShephard/whisper-medium.en-finetuned-gtzan"

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="audio-classification",
    model=MODEL_NAME,
    device=device,
)

def classify_audio(filepath):
    preds = pipe(filepath)
    outputs = {}
    for p in preds:
        outputs[p["label"]] = p["score"]
    return outputs


demo = gr.Blocks()

file_classify = gr.Interface(
    fn=classify_audio,
    #TODO not sure we need list here
    inputs=[
        #TODO not sure we need '.inputs.'
        gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
        #TODO add inputs source upload here, if possible?
        #TODO add inputs source youtube here, if possible?
    ],
    outputs="label", #TODO not sure about this
    layout="horizontal", #TODO not sure we need this
    theme="huggingface",
    title="Classify Genre of Music",
    description=(
        "Classify long-form audio or microphone inputs with the click of a button! Demo uses the"
        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to classify audio files"
        " of arbitrary length."
    ),
    examples=[
        ["./example.flac"],
    ],
    cache_examples=True,
    allow_flagging="never",
)

mic_classify = gr.Interface(
    fn=classify_audio,
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
    ],
    outputs="label", #TODO not sure about this
    layout="horizontal",
    theme="huggingface",
    title="Classify Genre of Music",
    description=(
        "Classify long-form audio or microphone inputs with the click of a button! Demo uses the"
        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to classify audio files"
        " of arbitrary length."
    ),
    allow_flagging="never",
)

with demo:
    gr.TabbedInterface([file_classify, mic_classify], ["Classify Audio File", "classify Microphone input"])

demo.launch(enable_queue=True)