audio-separator

Running

App Files Files Community

theNeofr commited on 11 days ago

Commit

27fc253

verified ·

1 Parent(s): d9a79e7

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -13

app.py CHANGED Viewed

@@ -401,7 +401,7 @@ def show_hide_params(param):
     return gr.update(visible=param)
 with gr.Blocks(
-    title="🎵 Audio-Separator by Politrees 🎵",
     css="footer{display:none !important}",
 ) as app:
     gr.HTML("<h1><center> 🎵 Audio-Separator HF Demo 🎵 </center></h1>")
@@ -414,9 +414,10 @@ with gr.Blocks(
             with gr.Accordion("Advanced settings", open=False):
                 with gr.Column(variant='panel'):
                     with gr.Group():
-                        roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
                         with gr.Row():
                             roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
                             roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
                             roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
                 with gr.Column(variant='panel'):
@@ -424,7 +425,7 @@ with gr.Blocks(
                         with gr.Row():
                             roformer_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                             roformer_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
-                            roformer_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             roformer_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
@@ -441,9 +442,10 @@ with gr.Blocks(
             with gr.Accordion("Advanced settings", open=False):
                 with gr.Column(variant='panel'):
                     with gr.Group():
-                        mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
                         with gr.Row():
                             mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
                             mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
                             mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
                 with gr.Column(variant='panel'):
@@ -451,7 +453,7 @@ with gr.Blocks(
                         with gr.Row():
                             mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                             mdx23c_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
-                            mdx23c_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
@@ -468,9 +470,10 @@ with gr.Blocks(
             with gr.Accordion("Advanced settings", open=False):
                 with gr.Column(variant='panel'):
                     with gr.Group():
-                        mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
                         with gr.Row():
                             mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
                             mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
                             mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
                 with gr.Column(variant='panel'):
@@ -478,7 +481,7 @@ with gr.Blocks(
                         with gr.Row():
                             mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                             mdx_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
-                            mdx_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             mdx_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
@@ -498,17 +501,17 @@ with gr.Blocks(
                         with gr.Row():
                             vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
                             vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
-                            vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
                         with gr.Row():
                             vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", visible=False)
                             vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
-                            vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
                             vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                             vr_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
-                            vr_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             vr_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
@@ -525,11 +528,12 @@ with gr.Blocks(
             with gr.Accordion("Advanced settings", open=False):
                 with gr.Column(variant='panel'):
                     with gr.Group():
-                        demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
                         with gr.Row():
                             demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
                             demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
-                            demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
@@ -595,7 +599,6 @@ with gr.Blocks(
             """
             This Space created by **[Politrees](https://github.com/Bebra777228) forked by [NeoFr](https://github.com/TheNeodev)**.
             * python-audio-separator by **[beveradb](https://github.com/beveradb)**.
-            * Thanks to **[Hev832](https://huggingface.co/Hev832)** for the help with the code.
             """
         )

     return gr.update(visible=param)
 with gr.Blocks(
+    title="🎵 Audio-Separator by HFD 🎵",
     css="footer{display:none !important}",
 ) as app:
     gr.HTML("<h1><center> 🎵 Audio-Separator HF Demo 🎵 </center></h1>")
             with gr.Accordion("Advanced settings", open=False):
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
+                            roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
                             roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
+                        with gr.Row():
                             roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
                             roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
                 with gr.Column(variant='panel'):
                         with gr.Row():
                             roformer_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                             roformer_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
+                        roformer_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             roformer_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
             with gr.Accordion("Advanced settings", open=False):
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
+                            mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
                             mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
+                        with gr.Row():
                             mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
                             mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
                 with gr.Column(variant='panel'):
                         with gr.Row():
                             mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                             mdx23c_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
+                        mdx23c_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
             with gr.Accordion("Advanced settings", open=False):
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
+                            mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
                             mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
+                        with gr.Row():
                             mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
                             mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
                 with gr.Column(variant='panel'):
                         with gr.Row():
                             mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                             mdx_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
+                        mdx_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             mdx_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
                         with gr.Row():
                             vr_post_process = gr.Checkbox(value=False, label="Post Process", info="Identify leftover artifacts within vocal output; may improve separation for some songs.")
                             vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
+                        vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
                         with gr.Row():
                             vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", visible=False)
                             vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
+                        vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
                             vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
                             vr_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
+                        vr_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             vr_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
             with gr.Accordion("Advanced settings", open=False):
                 with gr.Column(variant='panel'):
                     with gr.Group():
+                        with gr.Row():
+                            demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
                         with gr.Row():
                             demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
                             demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
+                        demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
             """
             This Space created by **[Politrees](https://github.com/Bebra777228) forked by [NeoFr](https://github.com/TheNeodev)**.
             * python-audio-separator by **[beveradb](https://github.com/beveradb)**.
             """
         )