Spaces:

alexander-lazarin
/

spanish_vowels

Sleeping

App Files Files Community

spanish_vowels / app.py

alexander-lazarin

Read gr.Audio input via filepath

e24d3c1 over 1 year ago

raw

history blame contribute delete

2.13 kB

	from fastai.vision.all import *
	import gradio as gr
	import librosa
	import matplotlib.pyplot as plt
	import numpy as np
	from PIL import Image
	from matplotlib.backends.backend_agg import FigureCanvasAgg
	from fastai.vision.all import *
	from fastcore.all import *
	import os

	learn = load_learner('spanish_vowels.pkl')

	categories = ('a', 'e', 'i', 'o', 'u')

	def classify_image(img):
	pred, idx, probs = learn.predict(img)
	return dict(zip(categories, map(float, probs)))

	def classify_voice(voice):
	img = voice_to_image(voice)
	pred, idx, probs = learn.predict(img)
	return dict(zip(categories, map(float, probs))), img

	def get_voice(voice):
	global voice_rec
	voice_rec = voice

	voice = gr.Audio(type='filepath')
	label = [gr.Label(), gr.Image()]

	def voice_to_image(voice):

	audio_data, sample_rate = librosa.load(voice)

	# Generate the spectrogram using librosa
	spectrogram = librosa.stft(audio_data)
	spectrogram_db = librosa.amplitude_to_db(abs(spectrogram))

	# Create a matplotlib figure and plot the spectrogram
	# fig = plt.figure(frameon=False, figsize=(320 / 80, 240 / 80), dpi=80)
	fig = plt.figure(frameon=False)
	ax = fig.add_axes([0, 0, 1, 1])
	ax.pcolormesh(spectrogram_db, cmap="gray")
	ax.set_axisbelow(True)
	ax.set_xlabel("Time")
	ax.set_ylabel("Frequency")
	ax.set_title("Spectrogram")

	# Remove the extra whitespace around the plot
	fig.tight_layout(pad=0)

	# Convert the figure to an image using the `PIL` library
	canvas = FigureCanvasAgg(fig)
	canvas.draw()
	image_data = canvas.tostring_rgb()
	width, height = fig.get_size_inches() * fig.get_dpi()
	image = Image.frombytes("RGB", (int(width), int(height)), image_data)

	# fig.savefig('tmp.jpg')
	# image = Image.open('tmp.jpg')

	# Close the figure to release memory
	plt.close(fig)

	return image

	# intf = gr.Interface(fn = classify_image, inputs = image, outputs = label)
	intf = gr.Interface(fn = classify_voice, inputs = voice, outputs = label)
	# intf = gr.Interface(fn = get_voice, inputs = voice, outputs = label)
	#
	intf.launch(debug=True)