Spaces:

gputrain
/

UrbanSounds8K

Runtime error

App Files Files Community

UrbanSounds8K / app.py

gputrain

fix

074e471 over 2 years ago

raw

history blame contribute delete

2.89 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	import gradio

	from fastai.vision.all import *
	from fastai.data.all import *
	from pathlib import Path
	import pandas as pd
	from matplotlib.pyplot import specgram
	import librosa
	import librosa.display
	from huggingface_hub import hf_hub_download
	from fastai.learner import load_learner


	# In[9]:


	ref_file = hf_hub_download("gputrain/UrbanSound8K-model", "UrbanSound8K.csv")

	model_file = hf_hub_download("gputrain/UrbanSound8K-model", "model.pkl")


	# In[10]:


	df = pd.read_csv(ref_file)
	df['fname'] = df[['slice_file_name','fold']].apply (lambda x: str(x['slice_file_name'][:-4])+'.png'.strip(),axis=1 )
	my_dict = dict(zip(df.fname,df['class']))
	def label_func(f_name):
	f_name = str(f_name).split('/')[-1:][0]
	return my_dict[f_name]
	model = load_learner (model_file)
	EXAMPLES_PATH = Path("./examples")
	labels = model.dls.vocab


	# In[11]:


	with open("article.md") as f:
	article = f.read()


	# In[12]:


	interface_options = {
	"title": "Urban Sound 8K Classification",
	"description": "Fast AI example of using a pre-trained Resnet34 vision model for an audio classification task on the [Urban Sounds](https://urbansounddataset.weebly.com/urbansound8k.html) dataset. ",
	"article": article,
	"interpretation": "default",
	"layout": "horizontal",
	# Audio from validation file
	"examples": ["dog_bark.wav", "children_playing.wav", "air_conditioner.wav", "street_music.wav", "engine_idling.wav",
	"jackhammer.wav", "drilling.wav", "siren.wav","car_horn.wav","gun_shot.wav"],
	"allow_flagging": "never"
	}


	# In[13]:


	def convert_sounds_melspectogram (audio_file):

	samples, sample_rate = librosa.load(audio_file) #create onces with librosa

	fig = plt.figure(figsize=[0.72,0.72])
	ax = fig.add_subplot(111)
	ax.axes.get_xaxis().set_visible(False)
	ax.axes.get_yaxis().set_visible(False)
	ax.set_frame_on(False)
	melS = librosa.feature.melspectrogram(y=samples, sr=sample_rate)
	librosa.display.specshow(librosa.power_to_db(melS, ref=np.max))
	filename = 'temp.png'
	plt.savefig(filename, dpi=400, bbox_inches='tight',pad_inches=0)
	plt.close('all')

	return None


	# In[14]:


	def predict():
	img = PILImage.create('temp.png')
	pred,pred_idx,probs = model.predict(img)
	return {labels[i]: float(probs[i]) for i in range(len(labels))}
	return labels_probs


	# In[20]:


	def end2endpipeline(filename):
	convert_sounds_melspectogram(filename)
	return predict()


	# In[16]:


	demo = gradio.Interface(
	fn=end2endpipeline,
	inputs=gradio.inputs.Audio(source="upload", type="filepath"),
	outputs=gradio.outputs.Label(num_top_classes=10),
	**interface_options,
	)


	# In[19]:


	launch_options = {
	"enable_queue": True,
	"share": False,
	#"cache_examples": True,
	}

	demo.launch(**launch_options)


	# In[ ]: