UrbanSounds8K / app.py
gputrain's picture
bug fix
b0018f3
raw
history blame
2.78 kB
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import gradio
from fastai.vision.all import *
from fastai.data.all import *
from pathlib import Path
import pandas as pd
from matplotlib.pyplot import specgram
import librosa
import librosa.display
from huggingface_hub import hf_hub_download
from fastai.learner import load_learner
# In[9]:
ref_file = hf_hub_download("gputrain/UrbanSound8K-model", "UrbanSound8K.csv")
model_file = hf_hub_download("gputrain/UrbanSound8K-model", "model.pkl")
# In[10]:
df = pd.read_csv(ref_file)
df['fname'] = df[['slice_file_name','fold']].apply (lambda x: str(x['slice_file_name'][:-4])+'.png'.strip(),axis=1 )
my_dict = dict(zip(df.fname,df['class']))
def label_func(f_name):
f_name = str(f_name).split('/')[-1:][0]
return my_dict[f_name]
model = load_learner (model_file)
labels = model.dls.vocab
# In[11]:
with open("article.md") as f:
article = f.read()
# In[12]:
interface_options = {
"title": "Urban Sound 8K Classification",
"description": "A Fast AI example with ResNet34 image classification of a sound wav file transformed to a Mel Spectrogram ",
#"article": article,
"interpretation": "default",
"layout": "horizontal",
# Audio from validation file
"examples": ["dog_bark.wav", "children_playing.wav", "air_conditioner.wav", "street_music.wav", "engine_idling.wav",
"jackhammer.wav", "drilling.wav", "siren.wav","car_horn.wav","gun_shot.wav"],
"allow_flagging": "never"
}
# In[13]:
def convert_sounds_melspectogram (audio_file):
samples, sample_rate = librosa.load(audio_file) #create onces with librosa
fig = plt.figure(figsize=[0.72,0.72])
ax = fig.add_subplot(111)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
ax.set_frame_on(False)
melS = librosa.feature.melspectrogram(y=samples, sr=sample_rate)
librosa.display.specshow(librosa.power_to_db(melS, ref=np.max))
filename = 'temp.png'
plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
plt.close('all')
return None
# In[14]:
def predict():
img = PILImage.create('temp.png')
pred,pred_idx,probs = model.predict(img)
return {labels[i]: float(probs[i]) for i in range(len(labels))}
return labels_probs
# In[20]:
def end2endpipeline(filename):
convert_sounds_melspectogram(filename)
return predict()
# In[16]:
demo = gradio.Interface(
fn=end2endpipeline,
inputs=gradio.inputs.Audio(source="upload", type="filepath"),
outputs=gradio.outputs.Label(num_top_classes=10),
**interface_options,
)
# In[19]:
launch_options = {
"enable_queue": True,
"share": False,
"cache_examples": True,
}
demo.launch(**launch_options)
# In[ ]: