File size: 2,165 Bytes
8c72189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c6f70e
8c72189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import gradio as gr

MODEL_NAME = "JackismyShephard/whisper-medium.en-finetuned-gtzan"

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="audio-classification",
    model=MODEL_NAME,
    device=device,
)

def classify_audio(filepath):
    preds = pipe(filepath)
    outputs = {}
    for p in preds:
        outputs[p["label"]] = p["score"]
    return outputs



demo = gr.Blocks()

file_transcribe = gr.Interface(
    fn=transcribe,
    #TODO not sure we need list here
    inputs=[
        #TODO not sure we need '.inputs.'
        gr.inputs.Audio(source="upload", optional=True, label="Audio file", type="filepath"),
        #TODO add inputs source upload here, if possible?
        #TODO add inputs source youtube here, if possible?
    ],
    outputs="label", #TODO not sure about this
    layout="horizontal", #TODO not sure we need this
    theme="huggingface",
    title="Classify Genre of Music",
    description=(
        "Classify long-form audio or microphone inputs with the click of a button! Demo uses the"
        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to classify audio files"
        " of arbitrary length."
    ),
    examples=[
        ["./example.flac"],
    ],
    cache_examples=True,
    allow_flagging="never",
)

mic_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
    ],
    outputs="label", #TODO not sure about this
    layout="horizontal",
    theme="huggingface",
    title="Classify Genre of Music",
    description=(
        "Classify long-form audio or microphone inputs with the click of a button! Demo uses the"
        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to classify audio files"
        " of arbitrary length."
    ),
    allow_flagging="never",
)

with demo:
    gr.TabbedInterface([file_transcribe, mic_transcribe], ["Classify Audio File", "classify Microphone input"])

demo.launch(enable_queue=True)