Spaces:
Sleeping
Sleeping
File size: 2,125 Bytes
f2d2799 a99fdb4 b0de1c2 a99fdb4 f2d2799 a99fdb4 f2d2799 b0de1c2 e24d3c1 b0de1c2 e24d3c1 b0de1c2 f2d2799 b0de1c2 3225caa dff31f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
from fastai.vision.all import *
import gradio as gr
import librosa
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from matplotlib.backends.backend_agg import FigureCanvasAgg
from fastai.vision.all import *
from fastcore.all import *
import os
learn = load_learner('spanish_vowels.pkl')
categories = ('a', 'e', 'i', 'o', 'u')
def classify_image(img):
pred, idx, probs = learn.predict(img)
return dict(zip(categories, map(float, probs)))
def classify_voice(voice):
img = voice_to_image(voice)
pred, idx, probs = learn.predict(img)
return dict(zip(categories, map(float, probs))), img
def get_voice(voice):
global voice_rec
voice_rec = voice
voice = gr.Audio(type='filepath')
label = [gr.Label(), gr.Image()]
def voice_to_image(voice):
audio_data, sample_rate = librosa.load(voice)
# Generate the spectrogram using librosa
spectrogram = librosa.stft(audio_data)
spectrogram_db = librosa.amplitude_to_db(abs(spectrogram))
# Create a matplotlib figure and plot the spectrogram
# fig = plt.figure(frameon=False, figsize=(320 / 80, 240 / 80), dpi=80)
fig = plt.figure(frameon=False)
ax = fig.add_axes([0, 0, 1, 1])
ax.pcolormesh(spectrogram_db, cmap="gray")
ax.set_axisbelow(True)
ax.set_xlabel("Time")
ax.set_ylabel("Frequency")
ax.set_title("Spectrogram")
# Remove the extra whitespace around the plot
fig.tight_layout(pad=0)
# Convert the figure to an image using the `PIL` library
canvas = FigureCanvasAgg(fig)
canvas.draw()
image_data = canvas.tostring_rgb()
width, height = fig.get_size_inches() * fig.get_dpi()
image = Image.frombytes("RGB", (int(width), int(height)), image_data)
# fig.savefig('tmp.jpg')
# image = Image.open('tmp.jpg')
# Close the figure to release memory
plt.close(fig)
return image
# intf = gr.Interface(fn = classify_image, inputs = image, outputs = label)
intf = gr.Interface(fn = classify_voice, inputs = voice, outputs = label)
# intf = gr.Interface(fn = get_voice, inputs = voice, outputs = label)
#
intf.launch(debug=True)
|