Spaces:
Running
Running
fix temp files cache and move to ZeroGPU
Browse files
app.py
CHANGED
@@ -1,12 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import random
|
3 |
import torch
|
4 |
-
import os
|
5 |
from torch import inference_mode
|
6 |
-
from tempfile import NamedTemporaryFile
|
|
|
7 |
import numpy as np
|
8 |
from models import load_model
|
9 |
import utils
|
|
|
10 |
from inversion_utils import inversion_forward_process, inversion_reverse_process
|
11 |
|
12 |
|
@@ -31,7 +37,7 @@ def randomize_seed_fn(seed, randomize_seed):
|
|
31 |
|
32 |
|
33 |
def invert(ldm_stable, x0, prompt_src, num_diffusion_steps, cfg_scale_src): # , ldm_stable):
|
34 |
-
ldm_stable.model.scheduler.set_timesteps(num_diffusion_steps, device=device)
|
35 |
|
36 |
with inference_mode():
|
37 |
w0 = ldm_stable.vae_encode(x0)
|
@@ -67,21 +73,22 @@ def sample(ldm_stable, zs, wts, steps, prompt_tar, tstart, cfg_scale_tar): # ,
|
|
67 |
|
68 |
return (16000, audio.squeeze().cpu().numpy())
|
69 |
|
70 |
-
|
71 |
-
def edit(
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
|
|
85 |
|
86 |
print(model_id)
|
87 |
if model_id == LDM2:
|
@@ -89,7 +96,9 @@ def edit(cache_dir,
|
|
89 |
elif model_id == LDM2_LARGE:
|
90 |
ldm_stable = ldm2_large
|
91 |
else: # MUSIC
|
92 |
-
|
|
|
|
|
93 |
|
94 |
# If the inversion was done for a different model, we need to re-run the inversion
|
95 |
if not do_inversion and (saved_inv_model is None or saved_inv_model != model_id):
|
@@ -99,29 +108,35 @@ def edit(cache_dir,
|
|
99 |
raise gr.Error('Input audio missing!')
|
100 |
x0 = utils.load_audio(input_audio, ldm_stable.get_fn_STFT(), device=device)
|
101 |
|
102 |
-
if not (do_inversion or randomize_seed):
|
103 |
-
if not os.path.exists(wtszs_file):
|
104 |
-
do_inversion = True
|
105 |
# Too much time has passed
|
|
|
|
|
106 |
|
107 |
if do_inversion or randomize_seed: # always re-run inversion
|
108 |
zs_tensor, wts_tensor = invert(ldm_stable=ldm_stable, x0=x0, prompt_src=source_prompt,
|
109 |
num_diffusion_steps=steps,
|
110 |
cfg_scale_src=cfg_scale_src)
|
111 |
-
f = NamedTemporaryFile("wb", dir=cache_dir, suffix=".pth", delete=False)
|
112 |
-
torch.save({'wts': wts_tensor, 'zs': zs_tensor}, f.name)
|
113 |
-
wtszs_file = f.name
|
114 |
# wtszs_file = gr.State(value=f.name)
|
115 |
# wts = gr.State(value=wts_tensor)
|
|
|
|
|
116 |
# zs = gr.State(value=zs_tensor)
|
117 |
# demo.move_resource_to_block_cache(f.name)
|
118 |
saved_inv_model = model_id
|
119 |
do_inversion = False
|
120 |
else:
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
|
|
|
|
125 |
|
126 |
# make sure t_start is in the right limit
|
127 |
# t_start = change_tstart_range(t_start, steps)
|
@@ -129,7 +144,8 @@ def edit(cache_dir,
|
|
129 |
output = sample(ldm_stable, zs_tensor, wts_tensor, steps, prompt_tar=target_prompt,
|
130 |
tstart=int(t_start / 100 * steps), cfg_scale_tar=cfg_scale_tar)
|
131 |
|
132 |
-
return output,
|
|
|
133 |
|
134 |
|
135 |
def get_example():
|
@@ -208,7 +224,7 @@ change <code style="display:inline; background-color: lightgrey; ">duration = mi
|
|
208 |
|
209 |
"""
|
210 |
|
211 |
-
with gr.Blocks(css='style.css'
|
212 |
def reset_do_inversion(do_inversion_user, do_inversion):
|
213 |
# do_inversion = gr.State(value=True)
|
214 |
do_inversion = True
|
@@ -219,18 +235,18 @@ with gr.Blocks(css='style.css', delete_cache=(3600, 3600)) as demo:
|
|
219 |
def clear_do_inversion_user(do_inversion_user):
|
220 |
do_inversion_user = False
|
221 |
return do_inversion_user
|
|
|
222 |
def post_match_do_inversion(do_inversion_user, do_inversion):
|
223 |
if do_inversion_user:
|
224 |
do_inversion = True
|
225 |
do_inversion_user = False
|
226 |
return do_inversion_user, do_inversion
|
227 |
|
228 |
-
|
229 |
gr.HTML(intro)
|
230 |
-
|
231 |
-
|
232 |
wtszs = gr.State()
|
233 |
-
cache_dir = gr.State(demo.GRADIO_CACHE)
|
234 |
saved_inv_model = gr.State()
|
235 |
# current_loaded_model = gr.State(value="cvssp/audioldm2-music")
|
236 |
# ldm_stable = load_model("cvssp/audioldm2-music", device, 200)
|
@@ -293,13 +309,13 @@ with gr.Blocks(css='style.css', delete_cache=(3600, 3600)) as demo:
|
|
293 |
outputs=[seed], queue=False).then(
|
294 |
fn=clear_do_inversion_user, inputs=[do_inversion_user], outputs=[do_inversion_user]).then(
|
295 |
fn=edit,
|
296 |
-
inputs=[cache_dir,
|
297 |
input_audio,
|
298 |
model_id,
|
299 |
do_inversion,
|
300 |
# current_loaded_model, ldm_stable,
|
301 |
-
|
302 |
-
|
303 |
saved_inv_model,
|
304 |
src_prompt,
|
305 |
tar_prompt,
|
@@ -309,7 +325,7 @@ with gr.Blocks(css='style.css', delete_cache=(3600, 3600)) as demo:
|
|
309 |
t_start,
|
310 |
randomize_seed
|
311 |
],
|
312 |
-
outputs=[output_audio, wtszs,
|
313 |
saved_inv_model, do_inversion] # , current_loaded_model, ldm_stable],
|
314 |
).then(post_match_do_inversion, inputs=[do_inversion_user, do_inversion], outputs=[do_inversion_user, do_inversion]
|
315 |
).then(lambda x: (demo.temp_file_sets.append(set([str(gr.utils.abspath(x))])) if type(x) is str else None),
|
@@ -332,4 +348,4 @@ with gr.Blocks(css='style.css', delete_cache=(3600, 3600)) as demo:
|
|
332 |
)
|
333 |
|
334 |
demo.queue()
|
335 |
-
demo.launch()
|
|
|
1 |
+
# Will be fixed soon, but meanwhile:
|
2 |
+
import os
|
3 |
+
if os.getenv('SPACES_ZERO_GPU') == "true":
|
4 |
+
os.environ['SPACES_ZERO_GPU'] = "1"
|
5 |
+
|
6 |
import gradio as gr
|
7 |
import random
|
8 |
import torch
|
|
|
9 |
from torch import inference_mode
|
10 |
+
# from tempfile import NamedTemporaryFile
|
11 |
+
from typing import Optional
|
12 |
import numpy as np
|
13 |
from models import load_model
|
14 |
import utils
|
15 |
+
import spaces
|
16 |
from inversion_utils import inversion_forward_process, inversion_reverse_process
|
17 |
|
18 |
|
|
|
37 |
|
38 |
|
39 |
def invert(ldm_stable, x0, prompt_src, num_diffusion_steps, cfg_scale_src): # , ldm_stable):
|
40 |
+
# ldm_stable.model.scheduler.set_timesteps(num_diffusion_steps, device=device)
|
41 |
|
42 |
with inference_mode():
|
43 |
w0 = ldm_stable.vae_encode(x0)
|
|
|
73 |
|
74 |
return (16000, audio.squeeze().cpu().numpy())
|
75 |
|
76 |
+
@spaces.GPU
|
77 |
+
def edit(
|
78 |
+
# cache_dir,
|
79 |
+
input_audio,
|
80 |
+
model_id: str,
|
81 |
+
do_inversion: bool,
|
82 |
+
# wtszs_file: str,
|
83 |
+
wts: Optional[torch.Tensor], zs: Optional[torch.Tensor],
|
84 |
+
saved_inv_model: str,
|
85 |
+
source_prompt="",
|
86 |
+
target_prompt="",
|
87 |
+
steps=200,
|
88 |
+
cfg_scale_src=3.5,
|
89 |
+
cfg_scale_tar=12,
|
90 |
+
t_start=45,
|
91 |
+
randomize_seed=True):
|
92 |
|
93 |
print(model_id)
|
94 |
if model_id == LDM2:
|
|
|
96 |
elif model_id == LDM2_LARGE:
|
97 |
ldm_stable = ldm2_large
|
98 |
else: # MUSIC
|
99 |
+
ldm_stable = ldm2_music
|
100 |
+
|
101 |
+
ldm_stable.model.scheduler.set_timesteps(steps, device=device)
|
102 |
|
103 |
# If the inversion was done for a different model, we need to re-run the inversion
|
104 |
if not do_inversion and (saved_inv_model is None or saved_inv_model != model_id):
|
|
|
108 |
raise gr.Error('Input audio missing!')
|
109 |
x0 = utils.load_audio(input_audio, ldm_stable.get_fn_STFT(), device=device)
|
110 |
|
111 |
+
# if not (do_inversion or randomize_seed):
|
112 |
+
# if not os.path.exists(wtszs_file):
|
113 |
+
# do_inversion = True
|
114 |
# Too much time has passed
|
115 |
+
if wts is None or zs is None:
|
116 |
+
do_inversion = True
|
117 |
|
118 |
if do_inversion or randomize_seed: # always re-run inversion
|
119 |
zs_tensor, wts_tensor = invert(ldm_stable=ldm_stable, x0=x0, prompt_src=source_prompt,
|
120 |
num_diffusion_steps=steps,
|
121 |
cfg_scale_src=cfg_scale_src)
|
122 |
+
# f = NamedTemporaryFile("wb", dir=cache_dir, suffix=".pth", delete=False)
|
123 |
+
# torch.save({'wts': wts_tensor, 'zs': zs_tensor}, f.name)
|
124 |
+
# wtszs_file = f.name
|
125 |
# wtszs_file = gr.State(value=f.name)
|
126 |
# wts = gr.State(value=wts_tensor)
|
127 |
+
wts = wts_tensor
|
128 |
+
zs = zs_tensor
|
129 |
# zs = gr.State(value=zs_tensor)
|
130 |
# demo.move_resource_to_block_cache(f.name)
|
131 |
saved_inv_model = model_id
|
132 |
do_inversion = False
|
133 |
else:
|
134 |
+
# wtszs = torch.load(wtszs_file, map_location=device)
|
135 |
+
# # wtszs = torch.load(wtszs_file.f, map_location=device)
|
136 |
+
# wts_tensor = wtszs['wts']
|
137 |
+
# zs_tensor = wtszs['zs']
|
138 |
+
wts_tensor = wts.to(device)
|
139 |
+
zs_tensor = zs.to(device)
|
140 |
|
141 |
# make sure t_start is in the right limit
|
142 |
# t_start = change_tstart_range(t_start, steps)
|
|
|
144 |
output = sample(ldm_stable, zs_tensor, wts_tensor, steps, prompt_tar=target_prompt,
|
145 |
tstart=int(t_start / 100 * steps), cfg_scale_tar=cfg_scale_tar)
|
146 |
|
147 |
+
return output, wts.cpu(), zs.cpu(), saved_inv_model, do_inversion
|
148 |
+
# return output, wtszs_file, saved_inv_model, do_inversion
|
149 |
|
150 |
|
151 |
def get_example():
|
|
|
224 |
|
225 |
"""
|
226 |
|
227 |
+
with gr.Blocks(css='style.css') as demo: #, delete_cache=(3600, 3600)) as demo:
|
228 |
def reset_do_inversion(do_inversion_user, do_inversion):
|
229 |
# do_inversion = gr.State(value=True)
|
230 |
do_inversion = True
|
|
|
235 |
def clear_do_inversion_user(do_inversion_user):
|
236 |
do_inversion_user = False
|
237 |
return do_inversion_user
|
238 |
+
|
239 |
def post_match_do_inversion(do_inversion_user, do_inversion):
|
240 |
if do_inversion_user:
|
241 |
do_inversion = True
|
242 |
do_inversion_user = False
|
243 |
return do_inversion_user, do_inversion
|
244 |
|
|
|
245 |
gr.HTML(intro)
|
246 |
+
wts = gr.State()
|
247 |
+
zs = gr.State()
|
248 |
wtszs = gr.State()
|
249 |
+
# cache_dir = gr.State(demo.GRADIO_CACHE)
|
250 |
saved_inv_model = gr.State()
|
251 |
# current_loaded_model = gr.State(value="cvssp/audioldm2-music")
|
252 |
# ldm_stable = load_model("cvssp/audioldm2-music", device, 200)
|
|
|
309 |
outputs=[seed], queue=False).then(
|
310 |
fn=clear_do_inversion_user, inputs=[do_inversion_user], outputs=[do_inversion_user]).then(
|
311 |
fn=edit,
|
312 |
+
inputs=[#cache_dir,
|
313 |
input_audio,
|
314 |
model_id,
|
315 |
do_inversion,
|
316 |
# current_loaded_model, ldm_stable,
|
317 |
+
wts, zs,
|
318 |
+
# wtszs,
|
319 |
saved_inv_model,
|
320 |
src_prompt,
|
321 |
tar_prompt,
|
|
|
325 |
t_start,
|
326 |
randomize_seed
|
327 |
],
|
328 |
+
outputs=[output_audio, wts, zs, # wtszs,
|
329 |
saved_inv_model, do_inversion] # , current_loaded_model, ldm_stable],
|
330 |
).then(post_match_do_inversion, inputs=[do_inversion_user, do_inversion], outputs=[do_inversion_user, do_inversion]
|
331 |
).then(lambda x: (demo.temp_file_sets.append(set([str(gr.utils.abspath(x))])) if type(x) is str else None),
|
|
|
348 |
)
|
349 |
|
350 |
demo.queue()
|
351 |
+
demo.launch(state_session_capacity=15)
|