Spaces:
Sleeping
Sleeping
from fastai.vision.all import * | |
import gradio as gr | |
import librosa | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from PIL import Image | |
from matplotlib.backends.backend_agg import FigureCanvasAgg | |
from fastai.vision.all import * | |
from fastcore.all import * | |
import os | |
learn = load_learner('spanish_vowels.pkl') | |
categories = ('a', 'e', 'i', 'o', 'u') | |
def classify_image(img): | |
pred, idx, probs = learn.predict(img) | |
return dict(zip(categories, map(float, probs))) | |
def classify_voice(voice): | |
img = voice_to_image(voice) | |
pred, idx, probs = learn.predict(img) | |
return dict(zip(categories, map(float, probs))), img | |
def get_voice(voice): | |
global voice_rec | |
voice_rec = voice | |
voice = gr.Audio(type='filepath') | |
label = [gr.Label(), gr.Image()] | |
def voice_to_image(voice): | |
audio_data, sample_rate = librosa.load(voice) | |
# Generate the spectrogram using librosa | |
spectrogram = librosa.stft(audio_data) | |
spectrogram_db = librosa.amplitude_to_db(abs(spectrogram)) | |
# Create a matplotlib figure and plot the spectrogram | |
# fig = plt.figure(frameon=False, figsize=(320 / 80, 240 / 80), dpi=80) | |
fig = plt.figure(frameon=False) | |
ax = fig.add_axes([0, 0, 1, 1]) | |
ax.pcolormesh(spectrogram_db, cmap="gray") | |
ax.set_axisbelow(True) | |
ax.set_xlabel("Time") | |
ax.set_ylabel("Frequency") | |
ax.set_title("Spectrogram") | |
# Remove the extra whitespace around the plot | |
fig.tight_layout(pad=0) | |
# Convert the figure to an image using the `PIL` library | |
canvas = FigureCanvasAgg(fig) | |
canvas.draw() | |
image_data = canvas.tostring_rgb() | |
width, height = fig.get_size_inches() * fig.get_dpi() | |
image = Image.frombytes("RGB", (int(width), int(height)), image_data) | |
# fig.savefig('tmp.jpg') | |
# image = Image.open('tmp.jpg') | |
# Close the figure to release memory | |
plt.close(fig) | |
return image | |
# intf = gr.Interface(fn = classify_image, inputs = image, outputs = label) | |
intf = gr.Interface(fn = classify_voice, inputs = voice, outputs = label) | |
# intf = gr.Interface(fn = get_voice, inputs = voice, outputs = label) | |
# | |
intf.launch(debug=True) | |