Spaces:
Runtime error
Runtime error
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[1]: | |
import gradio | |
from fastai.vision.all import * | |
from fastai.data.all import * | |
from pathlib import Path | |
import pandas as pd | |
from matplotlib.pyplot import specgram | |
import librosa | |
import librosa.display | |
from huggingface_hub import hf_hub_download | |
from fastai.learner import load_learner | |
# In[9]: | |
ref_file = hf_hub_download("gputrain/UrbanSound8K-model", "UrbanSound8K.csv") | |
model_file = hf_hub_download("gputrain/UrbanSound8K-model", "model.pkl") | |
# In[10]: | |
df = pd.read_csv(ref_file) | |
df['fname'] = df[['slice_file_name','fold']].apply (lambda x: str(x['slice_file_name'][:-4])+'.png'.strip(),axis=1 ) | |
my_dict = dict(zip(df.fname,df['class'])) | |
def label_func(f_name): | |
f_name = str(f_name).split('/')[-1:][0] | |
return my_dict[f_name] | |
model = load_learner (model_file) | |
labels = model.dls.vocab | |
# In[11]: | |
with open("article.md") as f: | |
article = f.read() | |
# In[12]: | |
interface_options = { | |
"title": "Urban Sound 8K Classification", | |
"description": "A Fast AI example with ResNet34 image classification of a sound wav file transformed to a Mel Spectrogram ", | |
#"article": article, | |
"interpretation": "default", | |
"layout": "horizontal", | |
# Audio from validation file | |
"examples": ["dog_bark.wav", "children_playing.wav", "air_conditioner.wav", "street_music.wav", "engine_idling.wav", | |
"jackhammer.wav", "drilling.wav", "siren.wav","car_horn.wav","gun_shot.wav"], | |
"allow_flagging": "never" | |
} | |
# In[13]: | |
def convert_sounds_melspectogram (audio_file): | |
samples, sample_rate = librosa.load(audio_file) #create onces with librosa | |
fig = plt.figure(figsize=[0.72,0.72]) | |
ax = fig.add_subplot(111) | |
ax.axes.get_xaxis().set_visible(False) | |
ax.axes.get_yaxis().set_visible(False) | |
ax.set_frame_on(False) | |
melS = librosa.feature.melspectrogram(y=samples, sr=sample_rate) | |
librosa.display.specshow(librosa.power_to_db(melS, ref=np.max)) | |
filename = 'temp.png' | |
plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0) | |
plt.close('all') | |
return None | |
# In[14]: | |
def predict(): | |
img = PILImage.create('temp.png') | |
pred,pred_idx,probs = model.predict(img) | |
return {labels[i]: float(probs[i]) for i in range(len(labels))} | |
return labels_probs | |
# In[20]: | |
def end2endpipeline(filename): | |
convert_sounds_melspectogram(filename) | |
return predict() | |
# In[16]: | |
demo = gradio.Interface( | |
fn=end2endpipeline, | |
inputs=gradio.inputs.Audio(source="upload", type="filepath"), | |
outputs=gradio.outputs.Label(num_top_classes=10), | |
**interface_options, | |
) | |
# In[19]: | |
launch_options = { | |
"enable_queue": True, | |
"share": False, | |
"cache_examples": True, | |
} | |
demo.launch(**launch_options) | |
# In[ ]: | |