theNeofr commited on
Commit
27fc253
·
verified ·
1 Parent(s): d9a79e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -13
app.py CHANGED
@@ -401,7 +401,7 @@ def show_hide_params(param):
401
  return gr.update(visible=param)
402
 
403
  with gr.Blocks(
404
- title="🎵 Audio-Separator by Politrees 🎵",
405
  css="footer{display:none !important}",
406
  ) as app:
407
  gr.HTML("<h1><center> 🎵 Audio-Separator HF Demo 🎵 </center></h1>")
@@ -414,9 +414,10 @@ with gr.Blocks(
414
  with gr.Accordion("Advanced settings", open=False):
415
  with gr.Column(variant='panel'):
416
  with gr.Group():
417
- roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
418
  with gr.Row():
 
419
  roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
 
420
  roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
421
  roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
422
  with gr.Column(variant='panel'):
@@ -424,7 +425,7 @@ with gr.Blocks(
424
  with gr.Row():
425
  roformer_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
426
  roformer_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
427
- roformer_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
428
  with gr.Row():
429
  roformer_audio = gr.Audio(label="Input Audio", type="filepath")
430
  with gr.Row():
@@ -441,9 +442,10 @@ with gr.Blocks(
441
  with gr.Accordion("Advanced settings", open=False):
442
  with gr.Column(variant='panel'):
443
  with gr.Group():
444
- mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
445
  with gr.Row():
 
446
  mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
 
447
  mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
448
  mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
449
  with gr.Column(variant='panel'):
@@ -451,7 +453,7 @@ with gr.Blocks(
451
  with gr.Row():
452
  mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
453
  mdx23c_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
454
- mdx23c_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
455
  with gr.Row():
456
  mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
457
  with gr.Row():
@@ -468,9 +470,10 @@ with gr.Blocks(
468
  with gr.Accordion("Advanced settings", open=False):
469
  with gr.Column(variant='panel'):
470
  with gr.Group():
471
- mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
472
  with gr.Row():
 
473
  mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
 
474
  mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
475
  mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
476
  with gr.Column(variant='panel'):
@@ -478,7 +481,7 @@ with gr.Blocks(
478
  with gr.Row():
479
  mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
480
  mdx_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
481
- mdx_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
482
  with gr.Row():
483
  mdx_audio = gr.Audio(label="Input Audio", type="filepath")
484
  with gr.Row():
@@ -498,17 +501,17 @@ with gr.Blocks(
498
  with gr.Row():
499
  vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
500
  vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
501
- vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
502
  with gr.Row():
503
  vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", visible=False)
504
  vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
505
- vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
506
  with gr.Column(variant='panel'):
507
  with gr.Group():
508
  with gr.Row():
509
  vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
510
  vr_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
511
- vr_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
512
  with gr.Row():
513
  vr_audio = gr.Audio(label="Input Audio", type="filepath")
514
  with gr.Row():
@@ -525,11 +528,12 @@ with gr.Blocks(
525
  with gr.Accordion("Advanced settings", open=False):
526
  with gr.Column(variant='panel'):
527
  with gr.Group():
528
- demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
 
529
  with gr.Row():
530
  demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
531
  demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
532
- demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
533
  with gr.Column(variant='panel'):
534
  with gr.Group():
535
  with gr.Row():
@@ -595,7 +599,6 @@ with gr.Blocks(
595
  """
596
  This Space created by **[Politrees](https://github.com/Bebra777228) forked by [NeoFr](https://github.com/TheNeodev)**.
597
  * python-audio-separator by **[beveradb](https://github.com/beveradb)**.
598
- * Thanks to **[Hev832](https://huggingface.co/Hev832)** for the help with the code.
599
  """
600
  )
601
 
 
401
  return gr.update(visible=param)
402
 
403
  with gr.Blocks(
404
+ title="🎵 Audio-Separator by HFD 🎵",
405
  css="footer{display:none !important}",
406
  ) as app:
407
  gr.HTML("<h1><center> 🎵 Audio-Separator HF Demo 🎵 </center></h1>")
 
414
  with gr.Accordion("Advanced settings", open=False):
415
  with gr.Column(variant='panel'):
416
  with gr.Group():
 
417
  with gr.Row():
418
+ roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
419
  roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
420
+ with gr.Row():
421
  roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
422
  roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
423
  with gr.Column(variant='panel'):
 
425
  with gr.Row():
426
  roformer_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
427
  roformer_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
428
+ roformer_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
429
  with gr.Row():
430
  roformer_audio = gr.Audio(label="Input Audio", type="filepath")
431
  with gr.Row():
 
442
  with gr.Accordion("Advanced settings", open=False):
443
  with gr.Column(variant='panel'):
444
  with gr.Group():
 
445
  with gr.Row():
446
+ mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
447
  mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
448
+ with gr.Row():
449
  mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
450
  mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
451
  with gr.Column(variant='panel'):
 
453
  with gr.Row():
454
  mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
455
  mdx23c_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
456
+ mdx23c_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
457
  with gr.Row():
458
  mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
459
  with gr.Row():
 
470
  with gr.Accordion("Advanced settings", open=False):
471
  with gr.Column(variant='panel'):
472
  with gr.Group():
 
473
  with gr.Row():
474
+ mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
475
  mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
476
+ with gr.Row():
477
  mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
478
  mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
479
  with gr.Column(variant='panel'):
 
481
  with gr.Row():
482
  mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
483
  mdx_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
484
+ mdx_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
485
  with gr.Row():
486
  mdx_audio = gr.Audio(label="Input Audio", type="filepath")
487
  with gr.Row():
 
501
  with gr.Row():
502
  vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
503
  vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
504
+ vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
505
  with gr.Row():
506
  vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", visible=False)
507
  vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
508
+ vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
509
  with gr.Column(variant='panel'):
510
  with gr.Group():
511
  with gr.Row():
512
  vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
513
  vr_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
514
+ vr_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
515
  with gr.Row():
516
  vr_audio = gr.Audio(label="Input Audio", type="filepath")
517
  with gr.Row():
 
528
  with gr.Accordion("Advanced settings", open=False):
529
  with gr.Column(variant='panel'):
530
  with gr.Group():
531
+ with gr.Row():
532
+ demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
533
  with gr.Row():
534
  demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
535
  demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
536
+ demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
537
  with gr.Column(variant='panel'):
538
  with gr.Group():
539
  with gr.Row():
 
599
  """
600
  This Space created by **[Politrees](https://github.com/Bebra777228) forked by [NeoFr](https://github.com/TheNeodev)**.
601
  * python-audio-separator by **[beveradb](https://github.com/beveradb)**.
 
602
  """
603
  )
604