kurianbenoy's picture
Use file type to upload audio
b8c0bc8
raw
history blame
2.12 kB
import gradio
import torchaudio
from fastai.vision.all import *
from fastai.learner import load_learner
from torchvision.utils import save_image
from huggingface_hub import hf_hub_download
model = load_learner(
hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl")
)
EXAMPLES_PATH = Path("./examples")
labels = model.dls.vocab
interface_options = {
"title": "Music Genre Classification",
"description": "A simple baseline model for classifying music genres with fast.ai on [Kaggle competition data](https://www.kaggle.com/competitions/kaggle-pog-series-s01e02/data)",
"examples": [f"{EXAMPLES_PATH}/{f.name}" for f in EXAMPLES_PATH.iterdir()],
"interpretation": "default",
"layout": "horizontal",
"theme": "default",
}
N_FFT = 2048
HOP_LEN = 1024
def create_spectrogram(filename):
audio, sr = torchaudio.load(filename)
specgram = torchaudio.transforms.MelSpectrogram(
sample_rate=sr,
n_fft=N_FFT,
win_length=N_FFT,
hop_length=HOP_LEN,
center=True,
pad_mode="reflect",
power=2.0,
norm="slaney",
onesided=True,
n_mels=224,
mel_scale="htk",
)(audio).mean(axis=0)
specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
specgram = specgram - specgram.min()
specgram = specgram / specgram.max()
return specgram
def create_image(filename):
specgram = create_spectrogram(filename)
dest = Path("temp.png")
save_image(specgram, "temp.png")
def predict(img):
img = PILImage.create(img)
_pred, _pred_w_idx, probs = model.predict(img)
labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
return labels_probs
def end2endpipeline(filename):
create_image(filename)
return predict("temp.png")
demo = gradio.Interface(
fn=end2endpipeline,
inputs=gradio.inputs.Audio(source="upload", type="filepath"),
outputs=gradio.outputs.Label(num_top_classes=5),
**interface_options,
)
launch_options = {
"enable_queue": True,
"share": False,
}
demo.launch(**launch_options)