Spaces:
Sleeping
Sleeping
Commit
·
b0de1c2
1
Parent(s):
e90c04c
implement voice to image
Browse files- app.py +60 -4
- requirements.txt +5 -1
app.py
CHANGED
@@ -1,5 +1,13 @@
|
|
1 |
from fastai.vision.all import *
|
2 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
learn = load_learner('spanish_vowels.pkl')
|
5 |
|
@@ -9,8 +17,56 @@ def classify_image(img):
|
|
9 |
pred, idx, probs = learn.predict(img)
|
10 |
return dict(zip(categories, map(float, probs)))
|
11 |
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
-
intf = gr.Interface(fn = classify_image, inputs = image, outputs = label)
|
16 |
-
intf.
|
|
|
|
|
|
1 |
from fastai.vision.all import *
|
2 |
import gradio as gr
|
3 |
+
import librosa
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import numpy as np
|
6 |
+
from PIL import Image
|
7 |
+
from matplotlib.backends.backend_agg import FigureCanvasAgg
|
8 |
+
from fastai.vision.all import *
|
9 |
+
from fastcore.all import *
|
10 |
+
import os
|
11 |
|
12 |
learn = load_learner('spanish_vowels.pkl')
|
13 |
|
|
|
17 |
pred, idx, probs = learn.predict(img)
|
18 |
return dict(zip(categories, map(float, probs)))
|
19 |
|
20 |
+
def classify_voice(voice):
|
21 |
+
img = voice_to_image(voice)
|
22 |
+
pred, idx, probs = learn.predict(img)
|
23 |
+
return dict(zip(categories, map(float, probs))), img
|
24 |
+
|
25 |
+
def get_voice(voice):
|
26 |
+
global voice_rec
|
27 |
+
voice_rec = voice
|
28 |
+
|
29 |
+
voice = gr.Audio()
|
30 |
+
label = [gr.Label(), gr.Image()]
|
31 |
+
|
32 |
+
def voice_to_image(voice):
|
33 |
+
|
34 |
+
sample_rate, audio_data = voice
|
35 |
+
audio_data = audio_data.astype(np.float32) # / 32767.0
|
36 |
+
|
37 |
+
# Generate the spectrogram using librosa
|
38 |
+
spectrogram = librosa.stft(audio_data)
|
39 |
+
spectrogram_db = librosa.amplitude_to_db(abs(spectrogram))
|
40 |
+
|
41 |
+
# Create a matplotlib figure and plot the spectrogram
|
42 |
+
# fig = plt.figure(frameon=False, figsize=(320 / 80, 240 / 80), dpi=80)
|
43 |
+
fig = plt.figure(frameon=False)
|
44 |
+
ax = fig.add_axes([0, 0, 1, 1])
|
45 |
+
ax.pcolormesh(spectrogram_db, cmap="gray")
|
46 |
+
ax.set_axisbelow(True)
|
47 |
+
ax.set_xlabel("Time")
|
48 |
+
ax.set_ylabel("Frequency")
|
49 |
+
ax.set_title("Spectrogram")
|
50 |
+
|
51 |
+
# Remove the extra whitespace around the plot
|
52 |
+
fig.tight_layout(pad=0)
|
53 |
+
|
54 |
+
# Convert the figure to an image using the `PIL` library
|
55 |
+
canvas = FigureCanvasAgg(fig)
|
56 |
+
canvas.draw()
|
57 |
+
image_data = canvas.tostring_rgb()
|
58 |
+
width, height = fig.get_size_inches() * fig.get_dpi()
|
59 |
+
image = Image.frombytes("RGB", (int(width), int(height)), image_data)
|
60 |
+
|
61 |
+
# fig.savefig('tmp.jpg')
|
62 |
+
# image = Image.open('tmp.jpg')
|
63 |
+
|
64 |
+
# Close the figure to release memory
|
65 |
+
plt.close(fig)
|
66 |
+
|
67 |
+
return image
|
68 |
|
69 |
+
# intf = gr.Interface(fn = classify_image, inputs = image, outputs = label)
|
70 |
+
intf = gr.Interface(fn = classify_voice, inputs = voice, outputs = label)
|
71 |
+
# intf = gr.Interface(fn = get_voice, inputs = voice, outputs = label)
|
72 |
+
intf.launch(debug=True, share=True)
|
requirements.txt
CHANGED
@@ -1 +1,5 @@
|
|
1 |
-
fastai
|
|
|
|
|
|
|
|
|
|
1 |
+
fastai
|
2 |
+
librosa
|
3 |
+
matplotlib
|
4 |
+
numpy
|
5 |
+
pillow
|