Spaces:
Runtime error
Runtime error
Joseph Catrambone
commited on
Commit
•
3dbb2cf
1
Parent(s):
b5ecd5f
Prevent models from forcing tensors to CUDA. Increase the default max_faces from 1 to 5.
Browse files- app.py +1 -1
- cldm/ddim_hacked.py +4 -3
- ldm/models/diffusion/ddim.py +4 -3
- ldm/models/diffusion/dpm_solver/sampler.py +4 -3
- ldm/models/diffusion/plms.py +4 -3
- ldm/modules/encoders/modules.py +18 -5
app.py
CHANGED
@@ -86,7 +86,7 @@ with block:
|
|
86 |
run_button = gr.Button(label="Run")
|
87 |
with gr.Accordion("Advanced options", open=False):
|
88 |
num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
|
89 |
-
max_faces = gr.Slider(label="Max Faces", minimum=1, maximum=
|
90 |
min_confidence = gr.Slider(label="Min Confidence", minimum=0.01, maximum=1.0, value=0.5, step=0.01)
|
91 |
strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
|
92 |
guess_mode = gr.Checkbox(label='Guess Mode', value=False)
|
|
|
86 |
run_button = gr.Button(label="Run")
|
87 |
with gr.Accordion("Advanced options", open=False):
|
88 |
num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
|
89 |
+
max_faces = gr.Slider(label="Max Faces", minimum=1, maximum=10, value=5, step=1)
|
90 |
min_confidence = gr.Slider(label="Min Confidence", minimum=0.01, maximum=1.0, value=0.5, step=0.01)
|
91 |
strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
|
92 |
guess_mode = gr.Checkbox(label='Guess Mode', value=False)
|
cldm/ddim_hacked.py
CHANGED
@@ -15,9 +15,10 @@ class DDIMSampler(object):
|
|
15 |
self.schedule = schedule
|
16 |
|
17 |
def register_buffer(self, name, attr):
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
21 |
setattr(self, name, attr)
|
22 |
|
23 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
|
|
15 |
self.schedule = schedule
|
16 |
|
17 |
def register_buffer(self, name, attr):
|
18 |
+
# Do not force attr to CUDA device by default. It may not exist.
|
19 |
+
#if type(attr) == torch.Tensor:
|
20 |
+
# if attr.device != torch.device("cuda"):
|
21 |
+
# attr = attr.to(torch.device("cuda"))
|
22 |
setattr(self, name, attr)
|
23 |
|
24 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
ldm/models/diffusion/ddim.py
CHANGED
@@ -15,9 +15,10 @@ class DDIMSampler(object):
|
|
15 |
self.schedule = schedule
|
16 |
|
17 |
def register_buffer(self, name, attr):
|
18 |
-
|
19 |
-
|
20 |
-
|
|
|
21 |
setattr(self, name, attr)
|
22 |
|
23 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
|
|
15 |
self.schedule = schedule
|
16 |
|
17 |
def register_buffer(self, name, attr):
|
18 |
+
# Do not force module to cuda by default.
|
19 |
+
#if type(attr) == torch.Tensor:
|
20 |
+
# if attr.device != torch.device("cuda"):
|
21 |
+
# attr = attr.to(torch.device("cuda"))
|
22 |
setattr(self, name, attr)
|
23 |
|
24 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
ldm/models/diffusion/dpm_solver/sampler.py
CHANGED
@@ -18,9 +18,10 @@ class DPMSolverSampler(object):
|
|
18 |
self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod))
|
19 |
|
20 |
def register_buffer(self, name, attr):
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
24 |
setattr(self, name, attr)
|
25 |
|
26 |
@torch.no_grad()
|
|
|
18 |
self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod))
|
19 |
|
20 |
def register_buffer(self, name, attr):
|
21 |
+
# This is in the original sampler.py, but it is forcing the attr to 'cuda' instead of the default device.
|
22 |
+
#if type(attr) == torch.Tensor:
|
23 |
+
# if attr.device != torch.device("cuda"):
|
24 |
+
# attr = attr.to(torch.device("cuda"))
|
25 |
setattr(self, name, attr)
|
26 |
|
27 |
@torch.no_grad()
|
ldm/models/diffusion/plms.py
CHANGED
@@ -17,9 +17,10 @@ class PLMSSampler(object):
|
|
17 |
self.schedule = schedule
|
18 |
|
19 |
def register_buffer(self, name, attr):
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
23 |
setattr(self, name, attr)
|
24 |
|
25 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
|
|
17 |
self.schedule = schedule
|
18 |
|
19 |
def register_buffer(self, name, attr):
|
20 |
+
# Do not force module to CUDA by default.
|
21 |
+
#if type(attr) == torch.Tensor:
|
22 |
+
# if attr.device != torch.device("cuda"):
|
23 |
+
# attr = attr.to(torch.device("cuda"))
|
24 |
setattr(self, name, attr)
|
25 |
|
26 |
def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
|
ldm/modules/encoders/modules.py
CHANGED
@@ -8,6 +8,9 @@ import open_clip
|
|
8 |
from ldm.util import default, count_params
|
9 |
|
10 |
|
|
|
|
|
|
|
11 |
class AbstractEncoder(nn.Module):
|
12 |
def __init__(self):
|
13 |
super().__init__()
|
@@ -42,7 +45,9 @@ class ClassEmbedder(nn.Module):
|
|
42 |
c = self.embedding(c)
|
43 |
return c
|
44 |
|
45 |
-
def get_unconditional_conditioning(self, bs, device=
|
|
|
|
|
46 |
uc_class = self.n_classes - 1 # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
|
47 |
uc = torch.ones((bs,), device=device) * uc_class
|
48 |
uc = {self.key: uc}
|
@@ -57,8 +62,10 @@ def disabled_train(self, mode=True):
|
|
57 |
|
58 |
class FrozenT5Embedder(AbstractEncoder):
|
59 |
"""Uses the T5 transformer encoder for text"""
|
60 |
-
def __init__(self, version="google/t5-v1_1-large", device=
|
61 |
super().__init__()
|
|
|
|
|
62 |
self.tokenizer = T5Tokenizer.from_pretrained(version)
|
63 |
self.transformer = T5EncoderModel.from_pretrained(version)
|
64 |
self.device = device
|
@@ -92,9 +99,11 @@ class FrozenCLIPEmbedder(AbstractEncoder):
|
|
92 |
"pooled",
|
93 |
"hidden"
|
94 |
]
|
95 |
-
def __init__(self, version="openai/clip-vit-large-patch14", device=
|
96 |
freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32
|
97 |
super().__init__()
|
|
|
|
|
98 |
assert layer in self.LAYERS
|
99 |
self.tokenizer = CLIPTokenizer.from_pretrained(version)
|
100 |
self.transformer = CLIPTextModel.from_pretrained(version)
|
@@ -140,9 +149,11 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder):
|
|
140 |
"last",
|
141 |
"penultimate"
|
142 |
]
|
143 |
-
def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device=
|
144 |
freeze=True, layer="last"):
|
145 |
super().__init__()
|
|
|
|
|
146 |
assert layer in self.LAYERS
|
147 |
model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version)
|
148 |
del model.visual
|
@@ -194,9 +205,11 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder):
|
|
194 |
|
195 |
|
196 |
class FrozenCLIPT5Encoder(AbstractEncoder):
|
197 |
-
def __init__(self, clip_version="openai/clip-vit-large-patch14", t5_version="google/t5-v1_1-xl", device=
|
198 |
clip_max_length=77, t5_max_length=77):
|
199 |
super().__init__()
|
|
|
|
|
200 |
self.clip_encoder = FrozenCLIPEmbedder(clip_version, device, max_length=clip_max_length)
|
201 |
self.t5_encoder = FrozenT5Embedder(t5_version, device, max_length=t5_max_length)
|
202 |
print(f"{self.clip_encoder.__class__.__name__} has {count_params(self.clip_encoder)*1.e-6:.2f} M parameters, "
|
|
|
8 |
from ldm.util import default, count_params
|
9 |
|
10 |
|
11 |
+
default_device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
|
12 |
+
|
13 |
+
|
14 |
class AbstractEncoder(nn.Module):
|
15 |
def __init__(self):
|
16 |
super().__init__()
|
|
|
45 |
c = self.embedding(c)
|
46 |
return c
|
47 |
|
48 |
+
def get_unconditional_conditioning(self, bs, device=None):
|
49 |
+
if device is None:
|
50 |
+
device = default_device
|
51 |
uc_class = self.n_classes - 1 # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
|
52 |
uc = torch.ones((bs,), device=device) * uc_class
|
53 |
uc = {self.key: uc}
|
|
|
62 |
|
63 |
class FrozenT5Embedder(AbstractEncoder):
|
64 |
"""Uses the T5 transformer encoder for text"""
|
65 |
+
def __init__(self, version="google/t5-v1_1-large", device=None, max_length=77, freeze=True): # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
|
66 |
super().__init__()
|
67 |
+
if device is None:
|
68 |
+
device = default_device
|
69 |
self.tokenizer = T5Tokenizer.from_pretrained(version)
|
70 |
self.transformer = T5EncoderModel.from_pretrained(version)
|
71 |
self.device = device
|
|
|
99 |
"pooled",
|
100 |
"hidden"
|
101 |
]
|
102 |
+
def __init__(self, version="openai/clip-vit-large-patch14", device=None, max_length=77,
|
103 |
freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32
|
104 |
super().__init__()
|
105 |
+
if device is None:
|
106 |
+
device = default_device
|
107 |
assert layer in self.LAYERS
|
108 |
self.tokenizer = CLIPTokenizer.from_pretrained(version)
|
109 |
self.transformer = CLIPTextModel.from_pretrained(version)
|
|
|
149 |
"last",
|
150 |
"penultimate"
|
151 |
]
|
152 |
+
def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device=None, max_length=77,
|
153 |
freeze=True, layer="last"):
|
154 |
super().__init__()
|
155 |
+
if device is None:
|
156 |
+
device = default_device
|
157 |
assert layer in self.LAYERS
|
158 |
model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version)
|
159 |
del model.visual
|
|
|
205 |
|
206 |
|
207 |
class FrozenCLIPT5Encoder(AbstractEncoder):
|
208 |
+
def __init__(self, clip_version="openai/clip-vit-large-patch14", t5_version="google/t5-v1_1-xl", device=None,
|
209 |
clip_max_length=77, t5_max_length=77):
|
210 |
super().__init__()
|
211 |
+
if device is None:
|
212 |
+
device = default_device
|
213 |
self.clip_encoder = FrozenCLIPEmbedder(clip_version, device, max_length=clip_max_length)
|
214 |
self.t5_encoder = FrozenT5Embedder(t5_version, device, max_length=t5_max_length)
|
215 |
print(f"{self.clip_encoder.__class__.__name__} has {count_params(self.clip_encoder)*1.e-6:.2f} M parameters, "
|