Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -153,6 +153,10 @@ Demo for the text-based editing method introduced in:
|
|
153 |
<a href="https://arxiv.org/abs/2402.10009" style="text-decoration: underline;" target="_blank"> Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion </a>
|
154 |
</p>
|
155 |
<p style="font-size:larger">
|
|
|
|
|
|
|
|
|
156 |
<b>Instructions:</b><br>
|
157 |
Provide an input audio and a target prompt to edit the audio. <br>
|
158 |
T<sub>start</sub> is used to control the tradeoff between fidelity to the original signal and text-adhearance.
|
@@ -169,7 +173,6 @@ For faster inference without waiting in queue, you may duplicate the space and u
|
|
169 |
<a href="https://huggingface.co/spaces/hilamanor/audioEditing?duplicate=true">
|
170 |
<img style="margin-top: 0em; margin-bottom: 0em; display:inline" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" ></a>
|
171 |
</p>
|
172 |
-
|
173 |
"""
|
174 |
|
175 |
with gr.Blocks(css='style.css') as demo:
|
@@ -187,16 +190,14 @@ with gr.Blocks(css='style.css') as demo:
|
|
187 |
do_inversion = gr.State(value=True) # To save some runtime when editing the same thing over and over
|
188 |
|
189 |
with gr.Row():
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
input_audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio",
|
194 |
-
interactive=True, scale=1)
|
195 |
|
196 |
-
|
197 |
-
tar_prompt = gr.Textbox(label="
|
198 |
lines=2, interactive=True)
|
199 |
-
|
200 |
|
201 |
with gr.Row():
|
202 |
with gr.Column():
|
@@ -204,13 +205,16 @@ with gr.Blocks(css='style.css') as demo:
|
|
204 |
|
205 |
with gr.Row():
|
206 |
t_start = gr.Slider(minimum=30, maximum=160, value=110, step=1, label="T-start", interactive=True, scale=3,
|
207 |
-
info="Higher T-start -> stronger edit. Lower T-start ->
|
208 |
model_id = gr.Dropdown(label="AudioLDM2 Version", choices=["cvssp/audioldm2",
|
209 |
"cvssp/audioldm2-large",
|
210 |
"cvssp/audioldm2-music"],
|
211 |
-
info="Choose a checkpoint suitable for your intended audio and edit
|
212 |
value="cvssp/audioldm2-music", interactive=True, type="value", scale=2)
|
213 |
-
with gr.Accordion("
|
|
|
|
|
|
|
214 |
|
215 |
with gr.Row():
|
216 |
cfg_scale_src = gr.Number(value=3, minimum=0.5, maximum=25, precision=None,
|
|
|
153 |
<a href="https://arxiv.org/abs/2402.10009" style="text-decoration: underline;" target="_blank"> Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion </a>
|
154 |
</p>
|
155 |
<p style="font-size:larger">
|
156 |
+
|
157 |
+
"""
|
158 |
+
|
159 |
+
help = """
|
160 |
<b>Instructions:</b><br>
|
161 |
Provide an input audio and a target prompt to edit the audio. <br>
|
162 |
T<sub>start</sub> is used to control the tradeoff between fidelity to the original signal and text-adhearance.
|
|
|
173 |
<a href="https://huggingface.co/spaces/hilamanor/audioEditing?duplicate=true">
|
174 |
<img style="margin-top: 0em; margin-bottom: 0em; display:inline" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" ></a>
|
175 |
</p>
|
|
|
176 |
"""
|
177 |
|
178 |
with gr.Blocks(css='style.css') as demo:
|
|
|
190 |
do_inversion = gr.State(value=True) # To save some runtime when editing the same thing over and over
|
191 |
|
192 |
with gr.Row():
|
193 |
+
input_audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio",
|
194 |
+
interactive=True, scale=1)
|
195 |
+
output_audio = gr.Audio(label="Edited Audio", interactive=False, scale=1)
|
|
|
|
|
196 |
|
197 |
+
with gr.Row():
|
198 |
+
tar_prompt = gr.Textbox(label="Prompt", info="Describe your desired edited output", placeholder="a recording of a happy upbeat arcade game soundtrack",
|
199 |
lines=2, interactive=True)
|
200 |
+
|
201 |
|
202 |
with gr.Row():
|
203 |
with gr.Column():
|
|
|
205 |
|
206 |
with gr.Row():
|
207 |
t_start = gr.Slider(minimum=30, maximum=160, value=110, step=1, label="T-start", interactive=True, scale=3,
|
208 |
+
info="Higher T-start -> stronger edit. Lower T-start -> closer to original audio")
|
209 |
model_id = gr.Dropdown(label="AudioLDM2 Version", choices=["cvssp/audioldm2",
|
210 |
"cvssp/audioldm2-large",
|
211 |
"cvssp/audioldm2-music"],
|
212 |
+
info="Choose a checkpoint suitable for your intended audio and edit",
|
213 |
value="cvssp/audioldm2-music", interactive=True, type="value", scale=2)
|
214 |
+
with gr.Accordion("More Options", open=False):
|
215 |
+
with gr.Row():
|
216 |
+
src_prompt = gr.Textbox(label="Source Prompt", lines=2, interactive=True, info= "Optional: Describe the original audio input",
|
217 |
+
placeholder="A recording of a happy upbeat classical music piece",)
|
218 |
|
219 |
with gr.Row():
|
220 |
cfg_scale_src = gr.Number(value=3, minimum=0.5, maximum=25, precision=None,
|