mrfakename commited on
Commit
cd5b7b4
0 Parent(s):

Super-squash branch 'main' using huggingface_hub

Browse files
Files changed (5) hide show
  1. .gitattributes +35 -0
  2. README.md +13 -0
  3. app.py +307 -0
  4. packages.txt +1 -0
  5. requirements.txt +6 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: seewav-gui
3
+ emoji: 🔊
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.0.2
8
+ app_file: app.py
9
+ pinned: true
10
+ license: cc0-1.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Mostly from: https://github.com/adefossez/seewav
2
+ # Original author: adefossez
3
+
4
+
5
+ import math
6
+ import tempfile
7
+ from pathlib import Path
8
+ import subprocess
9
+ import cairo
10
+ import numpy as np
11
+ import gradio as gr
12
+ from pydub import AudioSegment
13
+
14
+
15
+ def read_audio(audio, seek=None, duration=None):
16
+ """
17
+ Read the `audio` file, starting at `seek` (or 0) seconds for `duration` (or all) seconds.
18
+ Returns `float[channels, samples]`.
19
+ """
20
+
21
+ audio_segment = AudioSegment.from_file(audio)
22
+ channels = audio_segment.channels
23
+ samplerate = audio_segment.frame_rate
24
+
25
+ if seek is not None:
26
+ seek_ms = int(seek * 1000)
27
+ audio_segment = audio_segment[seek_ms:]
28
+
29
+ if duration is not None:
30
+ duration_ms = int(duration * 1000)
31
+ audio_segment = audio_segment[:duration_ms]
32
+
33
+ samples = audio_segment.get_array_of_samples()
34
+ wav = np.array(samples, dtype=np.float32)
35
+ return wav.reshape(channels, -1), samplerate
36
+
37
+
38
+ def sigmoid(x):
39
+ return 1 / (1 + np.exp(-x))
40
+
41
+
42
+ def envelope(wav, window, stride):
43
+ """
44
+ Extract the envelope of the waveform `wav` (float[samples]), using average pooling
45
+ with `window` samples and the given `stride`.
46
+ """
47
+ # pos = np.pad(np.maximum(wav, 0), window // 2)
48
+ wav = np.pad(wav, window // 2)
49
+ out = []
50
+ for off in range(0, len(wav) - window, stride):
51
+ frame = wav[off : off + window]
52
+ out.append(np.maximum(frame, 0).mean())
53
+ out = np.array(out)
54
+ # Some form of audio compressor based on the sigmoid.
55
+ out = 1.9 * (sigmoid(2.5 * out) - 0.5)
56
+ return out
57
+
58
+
59
+ def draw_env(envs, out, fg_colors, bg_color, size):
60
+ """
61
+ Internal function, draw a single frame (two frames for stereo) using cairo and save
62
+ it to the `out` file as png. envs is a list of envelopes over channels, each env
63
+ is a float[bars] representing the height of the envelope to draw. Each entry will
64
+ be represented by a bar.
65
+ """
66
+ surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, *size)
67
+ ctx = cairo.Context(surface)
68
+ ctx.scale(*size)
69
+
70
+ ctx.set_source_rgb(*bg_color)
71
+ ctx.rectangle(0, 0, 1, 1)
72
+ ctx.fill()
73
+
74
+ K = len(envs) # Number of waves to draw (waves are stacked vertically)
75
+ T = len(envs[0]) # Numbert of time steps
76
+ pad_ratio = 0.1 # spacing ratio between 2 bars
77
+ width = 1.0 / (T * (1 + 2 * pad_ratio))
78
+ pad = pad_ratio * width
79
+ delta = 2 * pad + width
80
+
81
+ ctx.set_line_width(width)
82
+ for step in range(T):
83
+ for i in range(K):
84
+ half = 0.5 * envs[i][step] # (semi-)height of the bar
85
+ half /= K # as we stack K waves vertically
86
+ midrule = (1 + 2 * i) / (2 * K) # midrule of i-th wave
87
+ ctx.set_source_rgb(*fg_colors[i])
88
+ ctx.move_to(pad + step * delta, midrule - half)
89
+ ctx.line_to(pad + step * delta, midrule)
90
+ ctx.stroke()
91
+ ctx.set_source_rgba(*fg_colors[i], 0.8)
92
+ ctx.move_to(pad + step * delta, midrule)
93
+ ctx.line_to(pad + step * delta, midrule + 0.9 * half)
94
+ ctx.stroke()
95
+
96
+ surface.write_to_png(out)
97
+
98
+
99
+ def interpole(x1, y1, x2, y2, x):
100
+ return y1 + (y2 - y1) * (x - x1) / (x2 - x1)
101
+
102
+
103
+ def visualize(
104
+ progress,
105
+ audio,
106
+ tmp,
107
+ out,
108
+ seek=None,
109
+ duration=None,
110
+ rate=60,
111
+ bars=50,
112
+ speed=4,
113
+ time=0.4,
114
+ oversample=3,
115
+ fg_color=(0.2, 0.2, 0.2),
116
+ fg_color2=(0.5, 0.3, 0.6),
117
+ bg_color=(1, 1, 1),
118
+ size=(400, 400),
119
+ stereo=False,
120
+ ):
121
+ """
122
+ Generate the visualisation for the `audio` file, using a `tmp` folder and saving the final
123
+ video in `out`.
124
+ `seek` and `durations` gives the extract location if any.
125
+ `rate` is the framerate of the output video.
126
+
127
+ `bars` is the number of bars in the animation.
128
+ `speed` is the base speed of transition. Depending on volume, actual speed will vary
129
+ between 0.5 and 2 times it.
130
+ `time` amount of audio shown at once on a frame.
131
+ `oversample` higher values will lead to more frequent changes.
132
+ `fg_color` is the rgb color to use for the foreground.
133
+ `fg_color2` is the rgb color to use for the second wav if stereo is set.
134
+ `bg_color` is the rgb color to use for the background.
135
+ `size` is the `(width, height)` in pixels to generate.
136
+ `stereo` is whether to create 2 waves.
137
+ """
138
+ try:
139
+ wav, sr = read_audio(audio, seek=seek, duration=duration)
140
+ except (IOError, ValueError) as err:
141
+ raise gr.Error(err)
142
+ # wavs is a list of wav over channels
143
+ wavs = []
144
+ if stereo:
145
+ assert wav.shape[0] == 2, "stereo requires stereo audio file"
146
+ wavs.append(wav[0])
147
+ wavs.append(wav[1])
148
+ else:
149
+ wav = wav.mean(0)
150
+ wavs.append(wav)
151
+
152
+ for i, wav in enumerate(wavs):
153
+ wavs[i] = wav / wav.std()
154
+
155
+ window = int(sr * time / bars)
156
+ stride = int(window / oversample)
157
+ # envs is a list of env over channels
158
+ envs = []
159
+ for wav in wavs:
160
+ env = envelope(wav, window, stride)
161
+ env = np.pad(env, (bars // 2, 2 * bars))
162
+ envs.append(env)
163
+
164
+ duration = len(wavs[0]) / sr
165
+ frames = int(rate * duration)
166
+ smooth = np.hanning(bars)
167
+
168
+ gr.Info("Generating the frames...")
169
+ for idx in progress(range(frames)):
170
+ pos = (((idx / rate)) * sr) / stride / bars
171
+ off = int(pos)
172
+ loc = pos - off
173
+ denvs = []
174
+ for env in envs:
175
+ env1 = env[off * bars : (off + 1) * bars]
176
+ env2 = env[(off + 1) * bars : (off + 2) * bars]
177
+
178
+ # we want loud parts to be updated faster
179
+ maxvol = math.log10(1e-4 + env2.max()) * 10
180
+ speedup = np.clip(interpole(-6, 0.5, 0, 2, maxvol), 0.5, 2)
181
+ w = sigmoid(speed * speedup * (loc - 0.5))
182
+ denv = (1 - w) * env1 + w * env2
183
+ denv *= smooth
184
+ denvs.append(denv)
185
+ draw_env(denvs, tmp / f"{idx:06d}.png", (fg_color, fg_color2), bg_color, size)
186
+ gr.Info("Encoding the animation video...")
187
+ subprocess.run([
188
+ "ffmpeg", "-y", "-loglevel", "panic", "-r",
189
+ str(rate), "-f", "image2", "-s", f"{size[0]}x{size[1]}", "-i", "%06d.png", "-i", audio, "-c:a", "aac", "-vcodec", "libx264", "-crf", "10", "-pix_fmt", "yuv420p",
190
+ out.resolve()
191
+ ], check=True, cwd=tmp)
192
+ return out
193
+
194
+
195
+
196
+ def parse_color(colorstr):
197
+ """
198
+ Given a comma separated rgb(a) colors, returns a 4-tuple of float.
199
+ """
200
+ try:
201
+ r, g, b = [float(i) for i in colorstr.split(",")]
202
+ return r, g, b
203
+ except ValueError:
204
+ raise gr.Error(
205
+ "Format for color is 3 floats separated by commas 0.xx,0.xx,0.xx, rgb order"
206
+ )
207
+
208
+
209
+ def hex_to_rgb(hex_color):
210
+ hex_color = hex_color.lstrip('#')
211
+ r = int(hex_color[0:2], 16) / 255.0
212
+ g = int(hex_color[2:4], 16) / 255.0
213
+ b = int(hex_color[4:6], 16) / 255.0
214
+ return (r, g, b)
215
+
216
+ def do_viz(
217
+ inp_aud,
218
+ inp_bgcolor,
219
+ inp_color1,
220
+ inp_nbars,
221
+ inp_vidw,
222
+ inp_vidh,
223
+ progress=gr.Progress(),
224
+ ):
225
+ with tempfile.TemporaryDirectory() as tmp, tempfile.NamedTemporaryFile(
226
+ suffix=".mp4",
227
+ delete=False
228
+ ) as out:
229
+ return visualize(
230
+ progress.tqdm,
231
+ inp_aud,
232
+ Path(tmp),
233
+ Path(out.name),
234
+ bars=inp_nbars,
235
+ fg_color=hex_to_rgb(inp_color1),
236
+ bg_color=hex_to_rgb(inp_bgcolor),
237
+ size=(inp_vidw, inp_vidh),
238
+ )
239
+
240
+
241
+ import gradio as gr
242
+
243
+ ABOUT = """
244
+ # seewav GUI
245
+
246
+ > Have an audio clip but need a video (e.g. for X/Twitter)?
247
+
248
+ **Convert audio into a video!**
249
+
250
+ An online graphical user interface for [seewav](https://github.com/adefossez/seewav).
251
+ """
252
+ with gr.Blocks() as demo:
253
+ gr.Markdown(ABOUT)
254
+ with gr.Row():
255
+ with gr.Column():
256
+ inp_aud = gr.Audio(type='filepath')
257
+ with gr.Group():
258
+ inp_color1 = gr.ColorPicker(
259
+ label="Color",
260
+ info="Color of the top waveform",
261
+ value="#00237E",
262
+ interactive=True,
263
+ )
264
+ inp_bgcolor = gr.ColorPicker(
265
+ label="Background Color",
266
+ info="Color of the background",
267
+ value="#000000",
268
+ interactive=True,
269
+ )
270
+ with gr.Accordion("Advanced Configuration", open=False):
271
+ inp_nbars = gr.Slider(
272
+ label="Num. Bars",
273
+ value=50,
274
+ interactive=True,
275
+ minimum=5,
276
+ maximum=1500,
277
+ )
278
+ inp_vidw = gr.Slider(
279
+ label="Video Width",
280
+ value=400,
281
+ interactive=True,
282
+ minimum=100,
283
+ maximum=3000,
284
+ )
285
+ inp_vidh = gr.Slider(
286
+ label="Video Height",
287
+ value=400,
288
+ interactive=True,
289
+ minimum=100,
290
+ maximum=3000,
291
+ )
292
+ inp_go = gr.Button("Visualize", variant="primary")
293
+ with gr.Column():
294
+ out_vid = gr.Video(interactive=False)
295
+ inp_go.click(
296
+ do_viz,
297
+ inputs=[
298
+ inp_aud,
299
+ inp_bgcolor,
300
+ inp_color1,
301
+ inp_nbars,
302
+ inp_vidw,
303
+ inp_vidh,
304
+ ],
305
+ outputs=[out_vid],
306
+ )
307
+ demo.queue(api_open=False, default_concurrency_limit=20).launch(show_api=False)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy
2
+ pycairo
3
+ tqdm
4
+ pydub
5
+ ffmpeg-python
6
+ opencv-python