Eddycrack864 commited on
Commit
9e06c0e
1 Parent(s): 3da398c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +524 -524
app.py CHANGED
@@ -1,524 +1,524 @@
1
- import os
2
- import re
3
- import random
4
- from scipy.io.wavfile import write
5
- import gradio as gr
6
-
7
- roformer_models = {
8
- 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
9
- 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
10
- 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
11
- 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
12
- }
13
-
14
- mdx23c_models = [
15
- 'MDX23C_D1581.ckpt',
16
- 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
17
- 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
18
- ]
19
-
20
- mdxnet_models = [
21
- 'UVR-MDX-NET-Inst_full_292.onnx',
22
- 'UVR-MDX-NET_Inst_187_beta.onnx',
23
- 'UVR-MDX-NET_Inst_82_beta.onnx',
24
- 'UVR-MDX-NET_Inst_90_beta.onnx',
25
- 'UVR-MDX-NET_Main_340.onnx',
26
- 'UVR-MDX-NET_Main_390.onnx',
27
- 'UVR-MDX-NET_Main_406.onnx',
28
- 'UVR-MDX-NET_Main_427.onnx',
29
- 'UVR-MDX-NET_Main_438.onnx',
30
- 'UVR-MDX-NET-Inst_HQ_1.onnx',
31
- 'UVR-MDX-NET-Inst_HQ_2.onnx',
32
- 'UVR-MDX-NET-Inst_HQ_3.onnx',
33
- 'UVR-MDX-NET-Inst_HQ_4.onnx',
34
- 'UVR_MDXNET_Main.onnx',
35
- 'UVR-MDX-NET-Inst_Main.onnx',
36
- 'UVR_MDXNET_1_9703.onnx',
37
- 'UVR_MDXNET_2_9682.onnx',
38
- 'UVR_MDXNET_3_9662.onnx',
39
- 'UVR-MDX-NET-Inst_1.onnx',
40
- 'UVR-MDX-NET-Inst_2.onnx',
41
- 'UVR-MDX-NET-Inst_3.onnx',
42
- 'UVR_MDXNET_KARA.onnx',
43
- 'UVR_MDXNET_KARA_2.onnx',
44
- 'UVR_MDXNET_9482.onnx',
45
- 'UVR-MDX-NET-Voc_FT.onnx',
46
- 'Kim_Vocal_1.onnx',
47
- 'Kim_Vocal_2.onnx',
48
- 'Kim_Inst.onnx',
49
- 'Reverb_HQ_By_FoxJoy.onnx',
50
- 'UVR-MDX-NET_Crowd_HQ_1.onnx',
51
- 'kuielab_a_vocals.onnx',
52
- 'kuielab_a_other.onnx',
53
- 'kuielab_a_bass.onnx',
54
- 'kuielab_a_drums.onnx',
55
- 'kuielab_b_vocals.onnx',
56
- 'kuielab_b_other.onnx',
57
- 'kuielab_b_bass.onnx',
58
- 'kuielab_b_drums.onnx',
59
- ]
60
-
61
- vrarch_models = [
62
- '1_HP-UVR.pth',
63
- '2_HP-UVR.pth',
64
- '3_HP-Vocal-UVR.pth',
65
- '4_HP-Vocal-UVR.pth',
66
- '5_HP-Karaoke-UVR.pth',
67
- '6_HP-Karaoke-UVR.pth',
68
- '7_HP2-UVR.pth',
69
- '8_HP2-UVR.pth',
70
- '9_HP2-UVR.pth',
71
- '10_SP-UVR-2B-32000-1.pth',
72
- '11_SP-UVR-2B-32000-2.pth',
73
- '12_SP-UVR-3B-44100.pth',
74
- '13_SP-UVR-4B-44100-1.pth',
75
- '14_SP-UVR-4B-44100-2.pth',
76
- '15_SP-UVR-MID-44100-1.pth',
77
- '16_SP-UVR-MID-44100-2.pth',
78
- '17_HP-Wind_Inst-UVR.pth',
79
- 'UVR-De-Echo-Aggressive.pth',
80
- 'UVR-De-Echo-Normal.pth',
81
- 'UVR-DeEcho-DeReverb.pth',
82
- 'UVR-DeNoise-Lite.pth',
83
- 'UVR-DeNoise.pth',
84
- 'UVR-BVE-4B_SN-44100-1.pth',
85
- 'MGM_HIGHEND_v4.pth',
86
- 'MGM_LOWEND_A_v4.pth',
87
- 'MGM_LOWEND_B_v4.pth',
88
- 'MGM_MAIN_v4.pth',
89
- ]
90
-
91
- demucs_models = [
92
- 'htdemucs_ft.yaml',
93
- 'htdemucs.yaml',
94
- 'hdemucs_mmi.yaml',
95
- ]
96
-
97
- output_format = [
98
- 'wav',
99
- 'flac',
100
- 'mp3',
101
- ]
102
-
103
- mdxnet_overlap_values = [
104
- '0.25',
105
- '0.5',
106
- '0.75',
107
- '0.99',
108
- ]
109
-
110
- vrarch_window_size_values = [
111
- '320',
112
- '512',
113
- '1024',
114
- ]
115
-
116
- def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap):
117
- files_list = []
118
- files_list.clear()
119
- directory = "./outputs"
120
- random_id = str(random.randint(10000, 99999))
121
- pattern = f"{random_id}"
122
- os.makedirs("outputs", exist_ok=True)
123
- write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
124
- full_roformer_model = roformer_models[roformer_model]
125
- prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap}"
126
- os.system(prompt)
127
-
128
- for file in os.listdir(directory):
129
- if re.search(pattern, file):
130
- files_list.append(os.path.join(directory, file))
131
-
132
- stem1_file = files_list[0]
133
- stem2_file = files_list[1]
134
-
135
- return stem1_file, stem2_file
136
-
137
- def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap):
138
- files_list = []
139
- files_list.clear()
140
- directory = "./outputs"
141
- random_id = str(random.randint(10000, 99999))
142
- pattern = f"{random_id}"
143
- os.makedirs("outputs", exist_ok=True)
144
- write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
145
- prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
146
- os.system(prompt)
147
-
148
- for file in os.listdir(directory):
149
- if re.search(pattern, file):
150
- files_list.append(os.path.join(directory, file))
151
-
152
- stem1_file = files_list[0]
153
- stem2_file = files_list[1]
154
-
155
- return stem1_file, stem2_file
156
-
157
- def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
158
- files_list = []
159
- files_list.clear()
160
- directory = "./outputs"
161
- random_id = str(random.randint(10000, 99999))
162
- pattern = f"{random_id}"
163
- os.makedirs("outputs", exist_ok=True)
164
- write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
165
- prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdxc_segment_size={mdxnet_segment_size} --mdxc_overlap={mdxnet_overlap}"
166
-
167
- if mdxnet_denoise:
168
- prompt += " --mdx_enable_denoise"
169
-
170
- os.system(prompt)
171
-
172
- for file in os.listdir(directory):
173
- if re.search(pattern, file):
174
- files_list.append(os.path.join(directory, file))
175
-
176
- stem1_file = files_list[0]
177
- stem2_file = files_list[1]
178
-
179
- return stem1_file, stem2_file
180
-
181
- def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process):
182
- files_list = []
183
- files_list.clear()
184
- directory = "./outputs"
185
- random_id = str(random.randint(10000, 99999))
186
- pattern = f"{random_id}"
187
- os.makedirs("outputs", exist_ok=True)
188
- write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
189
- prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
190
-
191
- if vrarch_tta:
192
- prompt += " --vr_enable_tta"
193
- if vrarch_high_end_process:
194
- prompt += " --vr_high_end_process"
195
-
196
- os.system(prompt)
197
-
198
- for file in os.listdir(directory):
199
- if re.search(pattern, file):
200
- files_list.append(os.path.join(directory, file))
201
-
202
- stem1_file = files_list[0]
203
- stem2_file = files_list[1]
204
-
205
- return stem1_file, stem2_file
206
-
207
- def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
208
- files_list = []
209
- files_list.clear()
210
- directory = "./outputs"
211
- random_id = str(random.randint(10000, 99999))
212
- pattern = f"{random_id}"
213
- os.makedirs("outputs", exist_ok=True)
214
- write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
215
- prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
216
-
217
- os.system(prompt)
218
-
219
- for file in os.listdir(directory):
220
- if re.search(pattern, file):
221
- files_list.append(os.path.join(directory, file))
222
-
223
- stem1_file = files_list[0]
224
- stem2_file = files_list[1]
225
- stem3_file = files_list[2]
226
- stem4_file = files_list[3]
227
-
228
- return stem1_file, stem2_file, stem3_file, stem4_file
229
-
230
- with gr.Blocks(title="🎵 UVR5 UI 🎵") as app:
231
- gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
232
- with gr.Tabs():
233
- with gr.TabItem("BS/Mel Roformer"):
234
- with gr.Row():
235
- roformer_model = gr.Dropdown(
236
- label = "Select the Model",
237
- choices=list(roformer_models.keys()),
238
- interactive = True
239
- )
240
- roformer_output_format = gr.Dropdown(
241
- label = "Select the Output Format",
242
- choices = output_format,
243
- interactive = True
244
- )
245
- with gr.Row():
246
- roformer_overlap = gr.Slider(
247
- minimum = 2,
248
- maximum = 4,
249
- step = 1,
250
- label = "Overlap",
251
- info = "Amount of overlap between prediction windows.",
252
- value = 4,
253
- interactive = True
254
- )
255
- with gr.Row():
256
- roformer_audio = gr.Audio(
257
- label = "Input Audio",
258
- type = "numpy",
259
- interactive = True
260
- )
261
- with gr.Row():
262
- roformer_button = gr.Button("Separate!", variant = "primary")
263
- with gr.Row():
264
- roformer_stem1 = gr.Audio(
265
- show_download_button = True,
266
- interactive = False,
267
- label = "Stem 1",
268
- type = "filepath"
269
- )
270
- roformer_stem2 = gr.Audio(
271
- show_download_button = True,
272
- interactive = False,
273
- label = "Stem 2",
274
- type = "filepath"
275
- )
276
-
277
- roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap], [roformer_stem1, roformer_stem2])
278
-
279
- with gr.TabItem("MDX23C"):
280
- with gr.Row():
281
- mdx23c_model = gr.Dropdown(
282
- label = "Select the Model",
283
- choices = mdx23c_models,
284
- interactive = True
285
- )
286
- mdx23c_output_format = gr.Dropdown(
287
- label = "Select the Output Format",
288
- choices = output_format,
289
- interactive = True
290
- )
291
- with gr.Row():
292
- mdx23c_segment_size = gr.Slider(
293
- minimum = 32,
294
- maximum = 4000,
295
- step = 32,
296
- label = "Segment Size",
297
- info = "Larger consumes more resources, but may give better results.",
298
- value = 256,
299
- interactive = True
300
- )
301
- mdx23c_overlap = gr.Slider(
302
- minimum = 2,
303
- maximum = 50,
304
- step = 1,
305
- label = "Overlap",
306
- info = "Amount of overlap between prediction windows.",
307
- value = 8,
308
- interactive = True
309
- )
310
- with gr.Row():
311
- mdx23c_audio = gr.Audio(
312
- label = "Input Audio",
313
- type = "numpy",
314
- interactive = True
315
- )
316
- with gr.Row():
317
- mdx23c_button = gr.Button("Separate!", variant = "primary")
318
- with gr.Row():
319
- mdx23c_stem1 = gr.Audio(
320
- show_download_button = True,
321
- interactive = False,
322
- label = "Stem 1",
323
- type = "filepath"
324
- )
325
- mdx23c_stem2 = gr.Audio(
326
- show_download_button = True,
327
- interactive = False,
328
- label = "Stem 2",
329
- type = "filepath"
330
- )
331
-
332
- mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap], [mdx23c_stem1, mdx23c_stem2])
333
-
334
- with gr.TabItem("MDX-NET"):
335
- with gr.Row():
336
- mdxnet_model = gr.Dropdown(
337
- label = "Select the Model",
338
- choices = mdxnet_models,
339
- interactive = True
340
- )
341
- mdxnet_output_format = gr.Dropdown(
342
- label = "Select the Output Format",
343
- choices = output_format,
344
- interactive = True
345
- )
346
- with gr.Row():
347
- mdxnet_segment_size = gr.Slider(
348
- minimum = 32,
349
- maximum = 4000,
350
- step = 32,
351
- label = "Segment Size",
352
- info = "Larger consumes more resources, but may give better results.",
353
- value = 256,
354
- interactive = True
355
- )
356
- mdxnet_overlap = gr.Dropdown(
357
- label = "Overlap",
358
- choices = mdxnet_overlap_values,
359
- value = mdxnet_overlap_values[0],
360
- interactive = True
361
- )
362
- mdxnet_denoise = gr.Checkbox(
363
- label = "Denoise",
364
- info = "Enable denoising during separation.",
365
- value = True,
366
- interactive = True
367
- )
368
- with gr.Row():
369
- mdxnet_audio = gr.Audio(
370
- label = "Input Audio",
371
- type = "numpy",
372
- interactive = True
373
- )
374
- with gr.Row():
375
- mdxnet_button = gr.Button("Separate", variant = "primary")
376
- with gr.Row():
377
- mdxnet_stem1 = gr.Audio(
378
- show_download_button = True,
379
- interactive = False,
380
- label = "Stem 1",
381
- type = "filepath"
382
- )
383
- mdxnet_stem2 = gr.Audio(
384
- show_download_button = True,
385
- interactive = False,
386
- label = "Stem 2",
387
- type = "filepath"
388
- )
389
-
390
- mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
391
-
392
- with gr.TabItem("VR ARCH"):
393
- with gr.Row():
394
- vrarch_model = gr.Dropdown(
395
- label = "Select the Model",
396
- choices = vrarch_models,
397
- interactive = True
398
- )
399
- vrarch_output_format = gr.Dropdown(
400
- label = "Select the Output Format",
401
- choices = output_format,
402
- interactive = True
403
- )
404
- with gr.Row():
405
- vrarch_window_size = gr.Dropdown(
406
- label = "Window Size",
407
- choices = vrarch_window_size_values,
408
- value = vrarch_window_size_values[0],
409
- interactive = True
410
- )
411
- vrarch_agression = gr.Slider(
412
- minimum = 1,
413
- maximum = 50,
414
- step = 1,
415
- label = "Agression",
416
- info = "Intensity of primary stem extraction.",
417
- value = 5,
418
- interactive = True
419
- )
420
- vrarch_tta = gr.Checkbox(
421
- label = "TTA",
422
- info = "Enable Test-Time-Augmentation; slow but improves quality.",
423
- value = True,
424
- interactive = True
425
- )
426
- vrarch_high_end_process = gr.Checkbox(
427
- label = "High End Process",
428
- info = "Mirror the missing frequency range of the output.",
429
- value = False,
430
- interactive = True
431
- )
432
- with gr.Row():
433
- vrarch_audio = gr.Audio(
434
- label = "Input Audio",
435
- type = "numpy",
436
- interactive = True
437
- )
438
- with gr.Row():
439
- vrarch_button = gr.Button("Separate", variant = "primary")
440
- with gr.Row():
441
- vrarch_stem1 = gr.Audio(
442
- show_download_button = True,
443
- interactive = False,
444
- type = "filepath",
445
- label = "Stem 1"
446
- )
447
- vrarch_stem2 = gr.Audio(
448
- show_download_button = True,
449
- interactive = False,
450
- type = "filepath",
451
- label = "Stem 2"
452
- )
453
-
454
- vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_stem1, vrarch_stem2])
455
-
456
- with gr.TabItem("Demucs"):
457
- with gr.Row():
458
- demucs_model = gr.Dropdown(
459
- label = "Select the Model",
460
- choices = demucs_models,
461
- interactive = True
462
- )
463
- demucs_output_format = gr.Dropdown(
464
- label = "Select the Output Format",
465
- choices = output_format,
466
- interactive = True
467
- )
468
- with gr.Row():
469
- demucs_shifts = gr.Slider(
470
- minimum = 1,
471
- maximum = 20,
472
- step = 1,
473
- label = "Shifts",
474
- info = "Number of predictions with random shifts, higher = slower but better quality.",
475
- value = 2,
476
- interactive = True
477
- )
478
- demucs_overlap = gr.Slider(
479
- minimum = 0.001,
480
- maximum = 0.999,
481
- step = 0.001,
482
- label = "Overlap",
483
- info = "Amount of overlap between prediction windows.",
484
- value = 0.025,
485
- interactive = True
486
- )
487
- with gr.Row():
488
- demucs_audio = gr.Audio(
489
- label = "Input Audio",
490
- type = "numpy",
491
- interactive = True
492
- )
493
- with gr.Row():
494
- demucs_button = gr.Button("Separate!", variant = "primary")
495
- with gr.Row():
496
- demucs_stem1 = gr.Audio(
497
- show_download_button = True,
498
- interactive = False,
499
- type = "filepath",
500
- label = "Stem 1"
501
- )
502
- demucs_stem2 = gr.Audio(
503
- show_download_button = True,
504
- interactive = False,
505
- type = "filepath",
506
- label = "Stem 2"
507
- )
508
- with gr.Row():
509
- demucs_stem3 = gr.Audio(
510
- show_download_button = True,
511
- interactive = False,
512
- type = "filepath",
513
- label = "Stem 3"
514
- )
515
- demucs_stem4 = gr.Audio(
516
- show_download_button = True,
517
- interactive = False,
518
- type = "filepath",
519
- label = "Stem 4"
520
- )
521
-
522
- demucs_button.click(vrarch_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
523
-
524
- app.launch()
 
1
+ import os
2
+ import re
3
+ import random
4
+ from scipy.io.wavfile import write
5
+ import gradio as gr
6
+
7
+ roformer_models = {
8
+ 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
9
+ 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
10
+ 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
11
+ 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
12
+ }
13
+
14
+ mdx23c_models = [
15
+ 'MDX23C_D1581.ckpt',
16
+ 'MDX23C-8KFFT-InstVoc_HQ.ckpt',
17
+ 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
18
+ ]
19
+
20
+ mdxnet_models = [
21
+ 'UVR-MDX-NET-Inst_full_292.onnx',
22
+ 'UVR-MDX-NET_Inst_187_beta.onnx',
23
+ 'UVR-MDX-NET_Inst_82_beta.onnx',
24
+ 'UVR-MDX-NET_Inst_90_beta.onnx',
25
+ 'UVR-MDX-NET_Main_340.onnx',
26
+ 'UVR-MDX-NET_Main_390.onnx',
27
+ 'UVR-MDX-NET_Main_406.onnx',
28
+ 'UVR-MDX-NET_Main_427.onnx',
29
+ 'UVR-MDX-NET_Main_438.onnx',
30
+ 'UVR-MDX-NET-Inst_HQ_1.onnx',
31
+ 'UVR-MDX-NET-Inst_HQ_2.onnx',
32
+ 'UVR-MDX-NET-Inst_HQ_3.onnx',
33
+ 'UVR-MDX-NET-Inst_HQ_4.onnx',
34
+ 'UVR_MDXNET_Main.onnx',
35
+ 'UVR-MDX-NET-Inst_Main.onnx',
36
+ 'UVR_MDXNET_1_9703.onnx',
37
+ 'UVR_MDXNET_2_9682.onnx',
38
+ 'UVR_MDXNET_3_9662.onnx',
39
+ 'UVR-MDX-NET-Inst_1.onnx',
40
+ 'UVR-MDX-NET-Inst_2.onnx',
41
+ 'UVR-MDX-NET-Inst_3.onnx',
42
+ 'UVR_MDXNET_KARA.onnx',
43
+ 'UVR_MDXNET_KARA_2.onnx',
44
+ 'UVR_MDXNET_9482.onnx',
45
+ 'UVR-MDX-NET-Voc_FT.onnx',
46
+ 'Kim_Vocal_1.onnx',
47
+ 'Kim_Vocal_2.onnx',
48
+ 'Kim_Inst.onnx',
49
+ 'Reverb_HQ_By_FoxJoy.onnx',
50
+ 'UVR-MDX-NET_Crowd_HQ_1.onnx',
51
+ 'kuielab_a_vocals.onnx',
52
+ 'kuielab_a_other.onnx',
53
+ 'kuielab_a_bass.onnx',
54
+ 'kuielab_a_drums.onnx',
55
+ 'kuielab_b_vocals.onnx',
56
+ 'kuielab_b_other.onnx',
57
+ 'kuielab_b_bass.onnx',
58
+ 'kuielab_b_drums.onnx',
59
+ ]
60
+
61
+ vrarch_models = [
62
+ '1_HP-UVR.pth',
63
+ '2_HP-UVR.pth',
64
+ '3_HP-Vocal-UVR.pth',
65
+ '4_HP-Vocal-UVR.pth',
66
+ '5_HP-Karaoke-UVR.pth',
67
+ '6_HP-Karaoke-UVR.pth',
68
+ '7_HP2-UVR.pth',
69
+ '8_HP2-UVR.pth',
70
+ '9_HP2-UVR.pth',
71
+ '10_SP-UVR-2B-32000-1.pth',
72
+ '11_SP-UVR-2B-32000-2.pth',
73
+ '12_SP-UVR-3B-44100.pth',
74
+ '13_SP-UVR-4B-44100-1.pth',
75
+ '14_SP-UVR-4B-44100-2.pth',
76
+ '15_SP-UVR-MID-44100-1.pth',
77
+ '16_SP-UVR-MID-44100-2.pth',
78
+ '17_HP-Wind_Inst-UVR.pth',
79
+ 'UVR-De-Echo-Aggressive.pth',
80
+ 'UVR-De-Echo-Normal.pth',
81
+ 'UVR-DeEcho-DeReverb.pth',
82
+ 'UVR-DeNoise-Lite.pth',
83
+ 'UVR-DeNoise.pth',
84
+ 'UVR-BVE-4B_SN-44100-1.pth',
85
+ 'MGM_HIGHEND_v4.pth',
86
+ 'MGM_LOWEND_A_v4.pth',
87
+ 'MGM_LOWEND_B_v4.pth',
88
+ 'MGM_MAIN_v4.pth',
89
+ ]
90
+
91
+ demucs_models = [
92
+ 'htdemucs_ft.yaml',
93
+ 'htdemucs.yaml',
94
+ 'hdemucs_mmi.yaml',
95
+ ]
96
+
97
+ output_format = [
98
+ 'wav',
99
+ 'flac',
100
+ 'mp3',
101
+ ]
102
+
103
+ mdxnet_overlap_values = [
104
+ '0.25',
105
+ '0.5',
106
+ '0.75',
107
+ '0.99',
108
+ ]
109
+
110
+ vrarch_window_size_values = [
111
+ '320',
112
+ '512',
113
+ '1024',
114
+ ]
115
+
116
+ def roformer_separator(roformer_audio, roformer_model, roformer_output_format, roformer_overlap):
117
+ files_list = []
118
+ files_list.clear()
119
+ directory = "./outputs"
120
+ random_id = str(random.randint(10000, 99999))
121
+ pattern = f"{random_id}"
122
+ os.makedirs("outputs", exist_ok=True)
123
+ write(f'{random_id}.wav', roformer_audio[0], roformer_audio[1])
124
+ full_roformer_model = roformer_models[roformer_model]
125
+ prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={roformer_output_format} --normalization=0.9 --mdxc_overlap={roformer_overlap}"
126
+ os.system(prompt)
127
+
128
+ for file in os.listdir(directory):
129
+ if re.search(pattern, file):
130
+ files_list.append(os.path.join(directory, file))
131
+
132
+ stem1_file = files_list[0]
133
+ stem2_file = files_list[1]
134
+
135
+ return stem1_file, stem2_file
136
+
137
+ def mdxc_separator(mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap):
138
+ files_list = []
139
+ files_list.clear()
140
+ directory = "./outputs"
141
+ random_id = str(random.randint(10000, 99999))
142
+ pattern = f"{random_id}"
143
+ os.makedirs("outputs", exist_ok=True)
144
+ write(f'{random_id}.wav', mdx23c_audio[0], mdx23c_audio[1])
145
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdx23c_model} --output_dir=./outputs --output_format={mdx23c_output_format} --normalization=0.9 --mdxc_segment_size={mdx23c_segment_size} --mdxc_overlap={mdx23c_overlap}"
146
+ os.system(prompt)
147
+
148
+ for file in os.listdir(directory):
149
+ if re.search(pattern, file):
150
+ files_list.append(os.path.join(directory, file))
151
+
152
+ stem1_file = files_list[0]
153
+ stem2_file = files_list[1]
154
+
155
+ return stem1_file, stem2_file
156
+
157
+ def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise):
158
+ files_list = []
159
+ files_list.clear()
160
+ directory = "./outputs"
161
+ random_id = str(random.randint(10000, 99999))
162
+ pattern = f"{random_id}"
163
+ os.makedirs("outputs", exist_ok=True)
164
+ write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1])
165
+ prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdxc_segment_size={mdxnet_segment_size} --mdxc_overlap={mdxnet_overlap}"
166
+
167
+ if mdxnet_denoise:
168
+ prompt += " --mdx_enable_denoise"
169
+
170
+ os.system(prompt)
171
+
172
+ for file in os.listdir(directory):
173
+ if re.search(pattern, file):
174
+ files_list.append(os.path.join(directory, file))
175
+
176
+ stem1_file = files_list[0]
177
+ stem2_file = files_list[1]
178
+
179
+ return stem1_file, stem2_file
180
+
181
+ def vrarch_separator(vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process):
182
+ files_list = []
183
+ files_list.clear()
184
+ directory = "./outputs"
185
+ random_id = str(random.randint(10000, 99999))
186
+ pattern = f"{random_id}"
187
+ os.makedirs("outputs", exist_ok=True)
188
+ write(f'{random_id}.wav', vrarch_audio[0], vrarch_audio[1])
189
+ prompt = f"audio-separator {random_id}.wav --model_filename {vrarch_model} --output_dir=./outputs --output_format={vrarch_output_format} --normalization=0.9 --vr_window_size={vrarch_window_size} --vr_aggression={vrarch_agression}"
190
+
191
+ if vrarch_tta:
192
+ prompt += " --vr_enable_tta"
193
+ if vrarch_high_end_process:
194
+ prompt += " --vr_high_end_process"
195
+
196
+ os.system(prompt)
197
+
198
+ for file in os.listdir(directory):
199
+ if re.search(pattern, file):
200
+ files_list.append(os.path.join(directory, file))
201
+
202
+ stem1_file = files_list[0]
203
+ stem2_file = files_list[1]
204
+
205
+ return stem1_file, stem2_file
206
+
207
+ def demucs_separator(demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap):
208
+ files_list = []
209
+ files_list.clear()
210
+ directory = "./outputs"
211
+ random_id = str(random.randint(10000, 99999))
212
+ pattern = f"{random_id}"
213
+ os.makedirs("outputs", exist_ok=True)
214
+ write(f'{random_id}.wav', demucs_audio[0], demucs_audio[1])
215
+ prompt = f"audio-separator {random_id}.wav --model_filename {demucs_model} --output_dir=./outputs --output_format={demucs_output_format} --normalization=0.9 --demucs_shifts={demucs_shifts} --demucs_overlap={demucs_overlap}"
216
+
217
+ os.system(prompt)
218
+
219
+ for file in os.listdir(directory):
220
+ if re.search(pattern, file):
221
+ files_list.append(os.path.join(directory, file))
222
+
223
+ stem1_file = files_list[0]
224
+ stem2_file = files_list[1]
225
+ stem3_file = files_list[2]
226
+ stem4_file = files_list[3]
227
+
228
+ return stem1_file, stem2_file, stem3_file, stem4_file
229
+
230
+ with gr.Blocks(title="🎵 UVR5 UI 🎵") as app:
231
+ gr.Markdown("<h1> 🎵 UVR5 UI 🎵 </h1>")
232
+ with gr.Tabs():
233
+ with gr.TabItem("BS/Mel Roformer"):
234
+ with gr.Row():
235
+ roformer_model = gr.Dropdown(
236
+ label = "Select the Model",
237
+ choices=list(roformer_models.keys()),
238
+ interactive = True
239
+ )
240
+ roformer_output_format = gr.Dropdown(
241
+ label = "Select the Output Format",
242
+ choices = output_format,
243
+ interactive = True
244
+ )
245
+ with gr.Row():
246
+ roformer_overlap = gr.Slider(
247
+ minimum = 2,
248
+ maximum = 4,
249
+ step = 1,
250
+ label = "Overlap",
251
+ info = "Amount of overlap between prediction windows.",
252
+ value = 4,
253
+ interactive = True
254
+ )
255
+ with gr.Row():
256
+ roformer_audio = gr.Audio(
257
+ label = "Input Audio",
258
+ type = "numpy",
259
+ interactive = True
260
+ )
261
+ with gr.Row():
262
+ roformer_button = gr.Button("Separate!", variant = "primary")
263
+ with gr.Row():
264
+ roformer_stem1 = gr.Audio(
265
+ show_download_button = True,
266
+ interactive = False,
267
+ label = "Stem 1",
268
+ type = "filepath"
269
+ )
270
+ roformer_stem2 = gr.Audio(
271
+ show_download_button = True,
272
+ interactive = False,
273
+ label = "Stem 2",
274
+ type = "filepath"
275
+ )
276
+
277
+ roformer_button.click(roformer_separator, [roformer_audio, roformer_model, roformer_output_format, roformer_overlap], [roformer_stem1, roformer_stem2])
278
+
279
+ with gr.TabItem("MDX23C"):
280
+ with gr.Row():
281
+ mdx23c_model = gr.Dropdown(
282
+ label = "Select the Model",
283
+ choices = mdx23c_models,
284
+ interactive = True
285
+ )
286
+ mdx23c_output_format = gr.Dropdown(
287
+ label = "Select the Output Format",
288
+ choices = output_format,
289
+ interactive = True
290
+ )
291
+ with gr.Row():
292
+ mdx23c_segment_size = gr.Slider(
293
+ minimum = 32,
294
+ maximum = 4000,
295
+ step = 32,
296
+ label = "Segment Size",
297
+ info = "Larger consumes more resources, but may give better results.",
298
+ value = 256,
299
+ interactive = True
300
+ )
301
+ mdx23c_overlap = gr.Slider(
302
+ minimum = 2,
303
+ maximum = 50,
304
+ step = 1,
305
+ label = "Overlap",
306
+ info = "Amount of overlap between prediction windows.",
307
+ value = 8,
308
+ interactive = True
309
+ )
310
+ with gr.Row():
311
+ mdx23c_audio = gr.Audio(
312
+ label = "Input Audio",
313
+ type = "numpy",
314
+ interactive = True
315
+ )
316
+ with gr.Row():
317
+ mdx23c_button = gr.Button("Separate!", variant = "primary")
318
+ with gr.Row():
319
+ mdx23c_stem1 = gr.Audio(
320
+ show_download_button = True,
321
+ interactive = False,
322
+ label = "Stem 1",
323
+ type = "filepath"
324
+ )
325
+ mdx23c_stem2 = gr.Audio(
326
+ show_download_button = True,
327
+ interactive = False,
328
+ label = "Stem 2",
329
+ type = "filepath"
330
+ )
331
+
332
+ mdx23c_button.click(mdxc_separator, [mdx23c_audio, mdx23c_model, mdx23c_output_format, mdx23c_segment_size, mdx23c_overlap], [mdx23c_stem1, mdx23c_stem2])
333
+
334
+ with gr.TabItem("MDX-NET"):
335
+ with gr.Row():
336
+ mdxnet_model = gr.Dropdown(
337
+ label = "Select the Model",
338
+ choices = mdxnet_models,
339
+ interactive = True
340
+ )
341
+ mdxnet_output_format = gr.Dropdown(
342
+ label = "Select the Output Format",
343
+ choices = output_format,
344
+ interactive = True
345
+ )
346
+ with gr.Row():
347
+ mdxnet_segment_size = gr.Slider(
348
+ minimum = 32,
349
+ maximum = 4000,
350
+ step = 32,
351
+ label = "Segment Size",
352
+ info = "Larger consumes more resources, but may give better results.",
353
+ value = 256,
354
+ interactive = True
355
+ )
356
+ mdxnet_overlap = gr.Dropdown(
357
+ label = "Overlap",
358
+ choices = mdxnet_overlap_values,
359
+ value = mdxnet_overlap_values[0],
360
+ interactive = True
361
+ )
362
+ mdxnet_denoise = gr.Checkbox(
363
+ label = "Denoise",
364
+ info = "Enable denoising during separation.",
365
+ value = True,
366
+ interactive = True
367
+ )
368
+ with gr.Row():
369
+ mdxnet_audio = gr.Audio(
370
+ label = "Input Audio",
371
+ type = "numpy",
372
+ interactive = True
373
+ )
374
+ with gr.Row():
375
+ mdxnet_button = gr.Button("Separate", variant = "primary")
376
+ with gr.Row():
377
+ mdxnet_stem1 = gr.Audio(
378
+ show_download_button = True,
379
+ interactive = False,
380
+ label = "Stem 1",
381
+ type = "filepath"
382
+ )
383
+ mdxnet_stem2 = gr.Audio(
384
+ show_download_button = True,
385
+ interactive = False,
386
+ label = "Stem 2",
387
+ type = "filepath"
388
+ )
389
+
390
+ mdxnet_button.click(mdxnet_separator, [mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise], [mdxnet_stem1, mdxnet_stem2])
391
+
392
+ with gr.TabItem("VR ARCH"):
393
+ with gr.Row():
394
+ vrarch_model = gr.Dropdown(
395
+ label = "Select the Model",
396
+ choices = vrarch_models,
397
+ interactive = True
398
+ )
399
+ vrarch_output_format = gr.Dropdown(
400
+ label = "Select the Output Format",
401
+ choices = output_format,
402
+ interactive = True
403
+ )
404
+ with gr.Row():
405
+ vrarch_window_size = gr.Dropdown(
406
+ label = "Window Size",
407
+ choices = vrarch_window_size_values,
408
+ value = vrarch_window_size_values[0],
409
+ interactive = True
410
+ )
411
+ vrarch_agression = gr.Slider(
412
+ minimum = 1,
413
+ maximum = 50,
414
+ step = 1,
415
+ label = "Agression",
416
+ info = "Intensity of primary stem extraction.",
417
+ value = 5,
418
+ interactive = True
419
+ )
420
+ vrarch_tta = gr.Checkbox(
421
+ label = "TTA",
422
+ info = "Enable Test-Time-Augmentation; slow but improves quality.",
423
+ value = True,
424
+ interactive = True
425
+ )
426
+ vrarch_high_end_process = gr.Checkbox(
427
+ label = "High End Process",
428
+ info = "Mirror the missing frequency range of the output.",
429
+ value = False,
430
+ interactive = True
431
+ )
432
+ with gr.Row():
433
+ vrarch_audio = gr.Audio(
434
+ label = "Input Audio",
435
+ type = "numpy",
436
+ interactive = True
437
+ )
438
+ with gr.Row():
439
+ vrarch_button = gr.Button("Separate", variant = "primary")
440
+ with gr.Row():
441
+ vrarch_stem1 = gr.Audio(
442
+ show_download_button = True,
443
+ interactive = False,
444
+ type = "filepath",
445
+ label = "Stem 1"
446
+ )
447
+ vrarch_stem2 = gr.Audio(
448
+ show_download_button = True,
449
+ interactive = False,
450
+ type = "filepath",
451
+ label = "Stem 2"
452
+ )
453
+
454
+ vrarch_button.click(vrarch_separator, [vrarch_audio, vrarch_model, vrarch_output_format, vrarch_window_size, vrarch_agression, vrarch_tta, vrarch_high_end_process], [vrarch_stem1, vrarch_stem2])
455
+
456
+ with gr.TabItem("Demucs"):
457
+ with gr.Row():
458
+ demucs_model = gr.Dropdown(
459
+ label = "Select the Model",
460
+ choices = demucs_models,
461
+ interactive = True
462
+ )
463
+ demucs_output_format = gr.Dropdown(
464
+ label = "Select the Output Format",
465
+ choices = output_format,
466
+ interactive = True
467
+ )
468
+ with gr.Row():
469
+ demucs_shifts = gr.Slider(
470
+ minimum = 1,
471
+ maximum = 20,
472
+ step = 1,
473
+ label = "Shifts",
474
+ info = "Number of predictions with random shifts, higher = slower but better quality.",
475
+ value = 2,
476
+ interactive = True
477
+ )
478
+ demucs_overlap = gr.Slider(
479
+ minimum = 0.001,
480
+ maximum = 0.999,
481
+ step = 0.001,
482
+ label = "Overlap",
483
+ info = "Amount of overlap between prediction windows.",
484
+ value = 0.025,
485
+ interactive = True
486
+ )
487
+ with gr.Row():
488
+ demucs_audio = gr.Audio(
489
+ label = "Input Audio",
490
+ type = "numpy",
491
+ interactive = True
492
+ )
493
+ with gr.Row():
494
+ demucs_button = gr.Button("Separate!", variant = "primary")
495
+ with gr.Row():
496
+ demucs_stem1 = gr.Audio(
497
+ show_download_button = True,
498
+ interactive = False,
499
+ type = "filepath",
500
+ label = "Stem 1"
501
+ )
502
+ demucs_stem2 = gr.Audio(
503
+ show_download_button = True,
504
+ interactive = False,
505
+ type = "filepath",
506
+ label = "Stem 2"
507
+ )
508
+ with gr.Row():
509
+ demucs_stem3 = gr.Audio(
510
+ show_download_button = True,
511
+ interactive = False,
512
+ type = "filepath",
513
+ label = "Stem 3"
514
+ )
515
+ demucs_stem4 = gr.Audio(
516
+ show_download_button = True,
517
+ interactive = False,
518
+ type = "filepath",
519
+ label = "Stem 4"
520
+ )
521
+
522
+ demucs_button.click(vrarch_separator, [demucs_audio, demucs_model, demucs_output_format, demucs_shifts, demucs_overlap], [demucs_stem1, demucs_stem2, demucs_stem3, demucs_stem4])
523
+
524
+ app.launch(share=True)