Joseph Catrambone commited on
Commit
3dbb2cf
1 Parent(s): b5ecd5f

Prevent models from forcing tensors to CUDA. Increase the default max_faces from 1 to 5.

Browse files
app.py CHANGED
@@ -86,7 +86,7 @@ with block:
86
  run_button = gr.Button(label="Run")
87
  with gr.Accordion("Advanced options", open=False):
88
  num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
89
- max_faces = gr.Slider(label="Max Faces", minimum=1, maximum=5, value=1, step=1)
90
  min_confidence = gr.Slider(label="Min Confidence", minimum=0.01, maximum=1.0, value=0.5, step=0.01)
91
  strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
92
  guess_mode = gr.Checkbox(label='Guess Mode', value=False)
 
86
  run_button = gr.Button(label="Run")
87
  with gr.Accordion("Advanced options", open=False):
88
  num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
89
+ max_faces = gr.Slider(label="Max Faces", minimum=1, maximum=10, value=5, step=1)
90
  min_confidence = gr.Slider(label="Min Confidence", minimum=0.01, maximum=1.0, value=0.5, step=0.01)
91
  strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
92
  guess_mode = gr.Checkbox(label='Guess Mode', value=False)
cldm/ddim_hacked.py CHANGED
@@ -15,9 +15,10 @@ class DDIMSampler(object):
15
  self.schedule = schedule
16
 
17
  def register_buffer(self, name, attr):
18
- if type(attr) == torch.Tensor:
19
- if attr.device != torch.device("cuda"):
20
- attr = attr.to(torch.device("cuda"))
 
21
  setattr(self, name, attr)
22
 
23
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
 
15
  self.schedule = schedule
16
 
17
  def register_buffer(self, name, attr):
18
+ # Do not force attr to CUDA device by default. It may not exist.
19
+ #if type(attr) == torch.Tensor:
20
+ # if attr.device != torch.device("cuda"):
21
+ # attr = attr.to(torch.device("cuda"))
22
  setattr(self, name, attr)
23
 
24
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
ldm/models/diffusion/ddim.py CHANGED
@@ -15,9 +15,10 @@ class DDIMSampler(object):
15
  self.schedule = schedule
16
 
17
  def register_buffer(self, name, attr):
18
- if type(attr) == torch.Tensor:
19
- if attr.device != torch.device("cuda"):
20
- attr = attr.to(torch.device("cuda"))
 
21
  setattr(self, name, attr)
22
 
23
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
 
15
  self.schedule = schedule
16
 
17
  def register_buffer(self, name, attr):
18
+ # Do not force module to cuda by default.
19
+ #if type(attr) == torch.Tensor:
20
+ # if attr.device != torch.device("cuda"):
21
+ # attr = attr.to(torch.device("cuda"))
22
  setattr(self, name, attr)
23
 
24
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
ldm/models/diffusion/dpm_solver/sampler.py CHANGED
@@ -18,9 +18,10 @@ class DPMSolverSampler(object):
18
  self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod))
19
 
20
  def register_buffer(self, name, attr):
21
- if type(attr) == torch.Tensor:
22
- if attr.device != torch.device("cuda"):
23
- attr = attr.to(torch.device("cuda"))
 
24
  setattr(self, name, attr)
25
 
26
  @torch.no_grad()
 
18
  self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod))
19
 
20
  def register_buffer(self, name, attr):
21
+ # This is in the original sampler.py, but it is forcing the attr to 'cuda' instead of the default device.
22
+ #if type(attr) == torch.Tensor:
23
+ # if attr.device != torch.device("cuda"):
24
+ # attr = attr.to(torch.device("cuda"))
25
  setattr(self, name, attr)
26
 
27
  @torch.no_grad()
ldm/models/diffusion/plms.py CHANGED
@@ -17,9 +17,10 @@ class PLMSSampler(object):
17
  self.schedule = schedule
18
 
19
  def register_buffer(self, name, attr):
20
- if type(attr) == torch.Tensor:
21
- if attr.device != torch.device("cuda"):
22
- attr = attr.to(torch.device("cuda"))
 
23
  setattr(self, name, attr)
24
 
25
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
 
17
  self.schedule = schedule
18
 
19
  def register_buffer(self, name, attr):
20
+ # Do not force module to CUDA by default.
21
+ #if type(attr) == torch.Tensor:
22
+ # if attr.device != torch.device("cuda"):
23
+ # attr = attr.to(torch.device("cuda"))
24
  setattr(self, name, attr)
25
 
26
  def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
ldm/modules/encoders/modules.py CHANGED
@@ -8,6 +8,9 @@ import open_clip
8
  from ldm.util import default, count_params
9
 
10
 
 
 
 
11
  class AbstractEncoder(nn.Module):
12
  def __init__(self):
13
  super().__init__()
@@ -42,7 +45,9 @@ class ClassEmbedder(nn.Module):
42
  c = self.embedding(c)
43
  return c
44
 
45
- def get_unconditional_conditioning(self, bs, device="cuda"):
 
 
46
  uc_class = self.n_classes - 1 # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
47
  uc = torch.ones((bs,), device=device) * uc_class
48
  uc = {self.key: uc}
@@ -57,8 +62,10 @@ def disabled_train(self, mode=True):
57
 
58
  class FrozenT5Embedder(AbstractEncoder):
59
  """Uses the T5 transformer encoder for text"""
60
- def __init__(self, version="google/t5-v1_1-large", device="cuda", max_length=77, freeze=True): # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
61
  super().__init__()
 
 
62
  self.tokenizer = T5Tokenizer.from_pretrained(version)
63
  self.transformer = T5EncoderModel.from_pretrained(version)
64
  self.device = device
@@ -92,9 +99,11 @@ class FrozenCLIPEmbedder(AbstractEncoder):
92
  "pooled",
93
  "hidden"
94
  ]
95
- def __init__(self, version="openai/clip-vit-large-patch14", device="cuda", max_length=77,
96
  freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32
97
  super().__init__()
 
 
98
  assert layer in self.LAYERS
99
  self.tokenizer = CLIPTokenizer.from_pretrained(version)
100
  self.transformer = CLIPTextModel.from_pretrained(version)
@@ -140,9 +149,11 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder):
140
  "last",
141
  "penultimate"
142
  ]
143
- def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device="cuda", max_length=77,
144
  freeze=True, layer="last"):
145
  super().__init__()
 
 
146
  assert layer in self.LAYERS
147
  model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version)
148
  del model.visual
@@ -194,9 +205,11 @@ class FrozenOpenCLIPEmbedder(AbstractEncoder):
194
 
195
 
196
  class FrozenCLIPT5Encoder(AbstractEncoder):
197
- def __init__(self, clip_version="openai/clip-vit-large-patch14", t5_version="google/t5-v1_1-xl", device="cuda",
198
  clip_max_length=77, t5_max_length=77):
199
  super().__init__()
 
 
200
  self.clip_encoder = FrozenCLIPEmbedder(clip_version, device, max_length=clip_max_length)
201
  self.t5_encoder = FrozenT5Embedder(t5_version, device, max_length=t5_max_length)
202
  print(f"{self.clip_encoder.__class__.__name__} has {count_params(self.clip_encoder)*1.e-6:.2f} M parameters, "
 
8
  from ldm.util import default, count_params
9
 
10
 
11
+ default_device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
12
+
13
+
14
  class AbstractEncoder(nn.Module):
15
  def __init__(self):
16
  super().__init__()
 
45
  c = self.embedding(c)
46
  return c
47
 
48
+ def get_unconditional_conditioning(self, bs, device=None):
49
+ if device is None:
50
+ device = default_device
51
  uc_class = self.n_classes - 1 # 1000 classes --> 0 ... 999, one extra class for ucg (class 1000)
52
  uc = torch.ones((bs,), device=device) * uc_class
53
  uc = {self.key: uc}
 
62
 
63
  class FrozenT5Embedder(AbstractEncoder):
64
  """Uses the T5 transformer encoder for text"""
65
+ def __init__(self, version="google/t5-v1_1-large", device=None, max_length=77, freeze=True): # others are google/t5-v1_1-xl and google/t5-v1_1-xxl
66
  super().__init__()
67
+ if device is None:
68
+ device = default_device
69
  self.tokenizer = T5Tokenizer.from_pretrained(version)
70
  self.transformer = T5EncoderModel.from_pretrained(version)
71
  self.device = device
 
99
  "pooled",
100
  "hidden"
101
  ]
102
+ def __init__(self, version="openai/clip-vit-large-patch14", device=None, max_length=77,
103
  freeze=True, layer="last", layer_idx=None): # clip-vit-base-patch32
104
  super().__init__()
105
+ if device is None:
106
+ device = default_device
107
  assert layer in self.LAYERS
108
  self.tokenizer = CLIPTokenizer.from_pretrained(version)
109
  self.transformer = CLIPTextModel.from_pretrained(version)
 
149
  "last",
150
  "penultimate"
151
  ]
152
+ def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", device=None, max_length=77,
153
  freeze=True, layer="last"):
154
  super().__init__()
155
+ if device is None:
156
+ device = default_device
157
  assert layer in self.LAYERS
158
  model, _, _ = open_clip.create_model_and_transforms(arch, device=torch.device('cpu'), pretrained=version)
159
  del model.visual
 
205
 
206
 
207
  class FrozenCLIPT5Encoder(AbstractEncoder):
208
+ def __init__(self, clip_version="openai/clip-vit-large-patch14", t5_version="google/t5-v1_1-xl", device=None,
209
  clip_max_length=77, t5_max_length=77):
210
  super().__init__()
211
+ if device is None:
212
+ device = default_device
213
  self.clip_encoder = FrozenCLIPEmbedder(clip_version, device, max_length=clip_max_length)
214
  self.t5_encoder = FrozenT5Embedder(t5_version, device, max_length=t5_max_length)
215
  print(f"{self.clip_encoder.__class__.__name__} has {count_params(self.clip_encoder)*1.e-6:.2f} M parameters, "