Spaces:

alexander-lazarin
/

spanish_vowels

Sleeping

App Files Files Community

alexander-lazarin commited on Nov 5, 2023

Commit

b0de1c2

1 Parent(s): e90c04c

implement voice to image

Browse files

Files changed (2) hide show

app.py +60 -4
requirements.txt +5 -1

app.py CHANGED Viewed

@@ -1,5 +1,13 @@
 from fastai.vision.all import *
 import gradio as gr
 learn = load_learner('spanish_vowels.pkl')
@@ -9,8 +17,56 @@ def classify_image(img):
     pred, idx, probs = learn.predict(img)
     return dict(zip(categories, map(float, probs)))
-image = gr.Image()
-label = gr.Label()
-intf = gr.Interface(fn = classify_image, inputs = image, outputs = label)
-intf.launch(inline=False, share=True)

 from fastai.vision.all import *
 import gradio as gr
+import librosa
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+from fastai.vision.all import *
+from fastcore.all import *
+import os
 learn = load_learner('spanish_vowels.pkl')
     pred, idx, probs = learn.predict(img)
     return dict(zip(categories, map(float, probs)))
+def classify_voice(voice):
+    img = voice_to_image(voice)
+    pred, idx, probs = learn.predict(img)
+    return dict(zip(categories, map(float, probs))), img
+def get_voice(voice):
+    global voice_rec
+    voice_rec = voice
+voice = gr.Audio()
+label = [gr.Label(), gr.Image()]
+def voice_to_image(voice):
+    sample_rate, audio_data = voice
+    audio_data = audio_data.astype(np.float32) # / 32767.0
+    # Generate the spectrogram using librosa
+    spectrogram = librosa.stft(audio_data)
+    spectrogram_db = librosa.amplitude_to_db(abs(spectrogram))
+    # Create a matplotlib figure and plot the spectrogram
+    # fig = plt.figure(frameon=False, figsize=(320 / 80, 240 / 80), dpi=80)
+    fig = plt.figure(frameon=False)
+    ax = fig.add_axes([0, 0, 1, 1])
+    ax.pcolormesh(spectrogram_db, cmap="gray")
+    ax.set_axisbelow(True)
+    ax.set_xlabel("Time")
+    ax.set_ylabel("Frequency")
+    ax.set_title("Spectrogram")
+    # Remove the extra whitespace around the plot
+    fig.tight_layout(pad=0)
+    # Convert the figure to an image using the `PIL` library
+    canvas = FigureCanvasAgg(fig)
+    canvas.draw()
+    image_data = canvas.tostring_rgb()
+    width, height = fig.get_size_inches() * fig.get_dpi()
+    image = Image.frombytes("RGB", (int(width), int(height)), image_data)
+    # fig.savefig('tmp.jpg')
+    # image = Image.open('tmp.jpg')
+    # Close the figure to release memory
+    plt.close(fig)
+    return image
+# intf = gr.Interface(fn = classify_image, inputs = image, outputs = label)
+intf = gr.Interface(fn = classify_voice, inputs = voice, outputs = label)
+# intf = gr.Interface(fn = get_voice, inputs = voice, outputs = label)
+intf.launch(debug=True, share=True)

requirements.txt CHANGED Viewed

	@@ -1 +1,5 @@
1	- fastai

+fastai
+librosa
+matplotlib
+numpy
+pillow