alexander-lazarin commited on
Commit
b0de1c2
·
1 Parent(s): e90c04c

implement voice to image

Browse files
Files changed (2) hide show
  1. app.py +60 -4
  2. requirements.txt +5 -1
app.py CHANGED
@@ -1,5 +1,13 @@
1
  from fastai.vision.all import *
2
  import gradio as gr
 
 
 
 
 
 
 
 
3
 
4
  learn = load_learner('spanish_vowels.pkl')
5
 
@@ -9,8 +17,56 @@ def classify_image(img):
9
  pred, idx, probs = learn.predict(img)
10
  return dict(zip(categories, map(float, probs)))
11
 
12
- image = gr.Image()
13
- label = gr.Label()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- intf = gr.Interface(fn = classify_image, inputs = image, outputs = label)
16
- intf.launch(inline=False, share=True)
 
 
 
1
  from fastai.vision.all import *
2
  import gradio as gr
3
+ import librosa
4
+ import matplotlib.pyplot as plt
5
+ import numpy as np
6
+ from PIL import Image
7
+ from matplotlib.backends.backend_agg import FigureCanvasAgg
8
+ from fastai.vision.all import *
9
+ from fastcore.all import *
10
+ import os
11
 
12
  learn = load_learner('spanish_vowels.pkl')
13
 
 
17
  pred, idx, probs = learn.predict(img)
18
  return dict(zip(categories, map(float, probs)))
19
 
20
+ def classify_voice(voice):
21
+ img = voice_to_image(voice)
22
+ pred, idx, probs = learn.predict(img)
23
+ return dict(zip(categories, map(float, probs))), img
24
+
25
+ def get_voice(voice):
26
+ global voice_rec
27
+ voice_rec = voice
28
+
29
+ voice = gr.Audio()
30
+ label = [gr.Label(), gr.Image()]
31
+
32
+ def voice_to_image(voice):
33
+
34
+ sample_rate, audio_data = voice
35
+ audio_data = audio_data.astype(np.float32) # / 32767.0
36
+
37
+ # Generate the spectrogram using librosa
38
+ spectrogram = librosa.stft(audio_data)
39
+ spectrogram_db = librosa.amplitude_to_db(abs(spectrogram))
40
+
41
+ # Create a matplotlib figure and plot the spectrogram
42
+ # fig = plt.figure(frameon=False, figsize=(320 / 80, 240 / 80), dpi=80)
43
+ fig = plt.figure(frameon=False)
44
+ ax = fig.add_axes([0, 0, 1, 1])
45
+ ax.pcolormesh(spectrogram_db, cmap="gray")
46
+ ax.set_axisbelow(True)
47
+ ax.set_xlabel("Time")
48
+ ax.set_ylabel("Frequency")
49
+ ax.set_title("Spectrogram")
50
+
51
+ # Remove the extra whitespace around the plot
52
+ fig.tight_layout(pad=0)
53
+
54
+ # Convert the figure to an image using the `PIL` library
55
+ canvas = FigureCanvasAgg(fig)
56
+ canvas.draw()
57
+ image_data = canvas.tostring_rgb()
58
+ width, height = fig.get_size_inches() * fig.get_dpi()
59
+ image = Image.frombytes("RGB", (int(width), int(height)), image_data)
60
+
61
+ # fig.savefig('tmp.jpg')
62
+ # image = Image.open('tmp.jpg')
63
+
64
+ # Close the figure to release memory
65
+ plt.close(fig)
66
+
67
+ return image
68
 
69
+ # intf = gr.Interface(fn = classify_image, inputs = image, outputs = label)
70
+ intf = gr.Interface(fn = classify_voice, inputs = voice, outputs = label)
71
+ # intf = gr.Interface(fn = get_voice, inputs = voice, outputs = label)
72
+ intf.launch(debug=True, share=True)
requirements.txt CHANGED
@@ -1 +1,5 @@
1
- fastai
 
 
 
 
 
1
+ fastai
2
+ librosa
3
+ matplotlib
4
+ numpy
5
+ pillow