NIRVANALAN commited on
Commit
14db06e
1 Parent(s): f944436
app.py CHANGED
@@ -106,10 +106,15 @@ def check_input_image(input_image):
106
 
107
 
108
  def main(args):
 
 
 
 
 
109
 
110
  # args.rendering_kwargs = rendering_options_defaults(args)
111
 
112
- # dist_util.setup_dist(args)
113
  logger.configure(dir=args.logdir)
114
 
115
  th.cuda.empty_cache()
@@ -207,7 +212,7 @@ def main(args):
207
  loss_class=None,
208
  data=data,
209
  eval_data=None,
210
- **vars(args))
211
 
212
  @spaces.GPU(duration=200)
213
  def reconstruct_and_export(*args, **kwargs):
 
106
 
107
 
108
  def main(args):
109
+ os.environ['MASTER_ADDR'] = 'localhost'
110
+ os.environ['MASTER_PORT'] = '12355'
111
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
112
+ os.environ["RANK"] = "0"
113
+ os.environ["WORLD_SIZE"] = "1"
114
 
115
  # args.rendering_kwargs = rendering_options_defaults(args)
116
 
117
+ dist_util.setup_dist(args)
118
  logger.configure(dir=args.logdir)
119
 
120
  th.cuda.empty_cache()
 
212
  loss_class=None,
213
  data=data,
214
  eval_data=None,
215
+ **args)
216
 
217
  @spaces.GPU(duration=200)
218
  def reconstruct_and_export(*args, **kwargs):
dit/__pycache__/dit_decoder.cpython-310.pyc ADDED
Binary file (5.97 kB). View file
 
dit/__pycache__/dit_i23d.cpython-310.pyc CHANGED
Binary files a/dit/__pycache__/dit_i23d.cpython-310.pyc and b/dit/__pycache__/dit_i23d.cpython-310.pyc differ
 
dit/__pycache__/dit_models_xformers.cpython-310.pyc CHANGED
Binary files a/dit/__pycache__/dit_models_xformers.cpython-310.pyc and b/dit/__pycache__/dit_models_xformers.cpython-310.pyc differ
 
dit/__pycache__/dit_trilatent.cpython-310.pyc CHANGED
Binary files a/dit/__pycache__/dit_trilatent.cpython-310.pyc and b/dit/__pycache__/dit_trilatent.cpython-310.pyc differ
 
dit/__pycache__/norm.cpython-310.pyc ADDED
Binary file (1.14 kB). View file
 
ldm/modules/__pycache__/attention.cpython-310.pyc CHANGED
Binary files a/ldm/modules/__pycache__/attention.cpython-310.pyc and b/ldm/modules/__pycache__/attention.cpython-310.pyc differ
 
logs/LSGM/inference/Objaverse/i23d/dit-L2/log.txt ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Logging to ./logs/LSGM/inference/Objaverse/i23d/dit-L2/
2
+ creating model and diffusion...
3
+ creating 3DAE...
4
+ length of vit_decoder.blocks: 24
5
+ init pos_embed with sincos
6
+ length of vit_decoder.blocks: 24
7
+ ignore dim_up_mlp: True
8
+ AE(
9
+ (encoder): MVEncoderGSDynamicInp(
10
+ (conv_in): Conv2d(10, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
11
+ (down): ModuleList(
12
+ (0): Module(
13
+ (block): ModuleList(
14
+ (0): ResnetBlock(
15
+ (norm1): GroupNorm(32, 64, eps=1e-06, affine=True)
16
+ (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
17
+ (norm2): GroupNorm(32, 64, eps=1e-06, affine=True)
18
+ (dropout): Dropout(p=0.0, inplace=False)
19
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
20
+ )
21
+ )
22
+ (attn): ModuleList()
23
+ (downsample): Downsample(
24
+ (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2))
25
+ )
26
+ )
27
+ (1): Module(
28
+ (block): ModuleList(
29
+ (0): ResnetBlock(
30
+ (norm1): GroupNorm(32, 64, eps=1e-06, affine=True)
31
+ (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
32
+ (norm2): GroupNorm(32, 128, eps=1e-06, affine=True)
33
+ (dropout): Dropout(p=0.0, inplace=False)
34
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
35
+ (nin_shortcut): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
36
+ )
37
+ )
38
+ (attn): ModuleList()
39
+ (downsample): Downsample(
40
+ (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2))
41
+ )
42
+ )
43
+ (2): Module(
44
+ (block): ModuleList(
45
+ (0): ResnetBlock(
46
+ (norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
47
+ (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
48
+ (norm2): GroupNorm(32, 256, eps=1e-06, affine=True)
49
+ (dropout): Dropout(p=0.0, inplace=False)
50
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
51
+ (nin_shortcut): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
52
+ )
53
+ )
54
+ (attn): ModuleList()
55
+ (downsample): Downsample(
56
+ (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2))
57
+ )
58
+ )
59
+ (3): Module(
60
+ (block): ModuleList(
61
+ (0): ResnetBlock(
62
+ (norm1): GroupNorm(32, 256, eps=1e-06, affine=True)
63
+ (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
64
+ (norm2): GroupNorm(32, 256, eps=1e-06, affine=True)
65
+ (dropout): Dropout(p=0.0, inplace=False)
66
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
67
+ )
68
+ )
69
+ (attn): ModuleList()
70
+ )
71
+ )
72
+ (mid): Module(
73
+ (block_1): ResnetBlock(
74
+ (norm1): GroupNorm(32, 256, eps=1e-06, affine=True)
75
+ (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
76
+ (norm2): GroupNorm(32, 256, eps=1e-06, affine=True)
77
+ (dropout): Dropout(p=0.0, inplace=False)
78
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
79
+ )
80
+ (attn_1): SpatialTransformer3D(
81
+ (norm): GroupNorm(32, 256, eps=1e-06, affine=True)
82
+ (proj_in): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1))
83
+ (transformer_blocks): ModuleList(
84
+ (0): BasicTransformerBlock3D(
85
+ (attn1): MemoryEfficientCrossAttention(
86
+ (to_q): Linear(in_features=512, out_features=512, bias=False)
87
+ (to_k): Linear(in_features=512, out_features=512, bias=False)
88
+ (q_norm): Identity()
89
+ (k_norm): Identity()
90
+ (to_v): Linear(in_features=512, out_features=512, bias=False)
91
+ (to_out): Sequential(
92
+ (0): Linear(in_features=512, out_features=512, bias=True)
93
+ (1): Dropout(p=0.0, inplace=False)
94
+ )
95
+ )
96
+ (ff): FeedForward(
97
+ (net): Sequential(
98
+ (0): GEGLU(
99
+ (proj): Linear(in_features=512, out_features=4096, bias=True)
100
+ )
101
+ (1): Dropout(p=0.0, inplace=False)
102
+ (2): Linear(in_features=2048, out_features=512, bias=True)
103
+ )
104
+ )
105
+ (attn2): MemoryEfficientCrossAttention(
106
+ (to_q): Linear(in_features=512, out_features=512, bias=False)
107
+ (to_k): Linear(in_features=512, out_features=512, bias=False)
108
+ (q_norm): Identity()
109
+ (k_norm): Identity()
110
+ (to_v): Linear(in_features=512, out_features=512, bias=False)
111
+ (to_out): Sequential(
112
+ (0): Linear(in_features=512, out_features=512, bias=True)
113
+ (1): Dropout(p=0.0, inplace=False)
114
+ )
115
+ )
116
+ (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
117
+ (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
118
+ (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
119
+ )
120
+ )
121
+ (proj_out): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
122
+ )
123
+ (block_2): ResnetBlock(
124
+ (norm1): GroupNorm(32, 256, eps=1e-06, affine=True)
125
+ (conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
126
+ (norm2): GroupNorm(32, 256, eps=1e-06, affine=True)
127
+ (dropout): Dropout(p=0.0, inplace=False)
128
+ (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
129
+ )
130
+ )
131
+ (norm_out): GroupNorm(32, 256, eps=1e-06, affine=True)
132
+ (conv_out): Conv2d(256, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
133
+ )
134
+ (decoder): RodinSR_256_fusionv6_ConvQuant_liteSR_dinoInit3DAttn_SD_B_3L_C_withrollout_withSD_D_ditDecoder(
135
+ (superresolution): ModuleDict(
136
+ (ldm_upsample): PatchEmbedTriplane(
137
+ (proj): Conv2d(12, 3072, kernel_size=(2, 2), stride=(2, 2), groups=3)
138
+ (norm): Identity()
139
+ )
140
+ (quant_conv): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), groups=3)
141
+ (conv_sr): Decoder(
142
+ (conv_in): Conv2d(1024, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
143
+ (mid): Module(
144
+ (block_1): ResnetBlock(
145
+ (norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
146
+ (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
147
+ (norm2): GroupNorm(32, 128, eps=1e-06, affine=True)
148
+ (dropout): Dropout(p=0.0, inplace=False)
149
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
150
+ )
151
+ (attn_1): MemoryEfficientAttnBlock(
152
+ (norm): GroupNorm(32, 128, eps=1e-06, affine=True)
153
+ (q): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
154
+ (k): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
155
+ (v): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
156
+ (proj_out): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
157
+ )
158
+ (block_2): ResnetBlock(
159
+ (norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
160
+ (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
161
+ (norm2): GroupNorm(32, 128, eps=1e-06, affine=True)
162
+ (dropout): Dropout(p=0.0, inplace=False)
163
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
164
+ )
165
+ )
166
+ (up): ModuleList(
167
+ (0): Module(
168
+ (block): ModuleList(
169
+ (0): ResnetBlock(
170
+ (norm1): GroupNorm(32, 64, eps=1e-06, affine=True)
171
+ (conv1): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
172
+ (norm2): GroupNorm(32, 32, eps=1e-06, affine=True)
173
+ (dropout): Dropout(p=0.0, inplace=False)
174
+ (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
175
+ (nin_shortcut): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
176
+ )
177
+ (1): ResnetBlock(
178
+ (norm1): GroupNorm(32, 32, eps=1e-06, affine=True)
179
+ (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
180
+ (norm2): GroupNorm(32, 32, eps=1e-06, affine=True)
181
+ (dropout): Dropout(p=0.0, inplace=False)
182
+ (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
183
+ )
184
+ )
185
+ (attn): ModuleList()
186
+ )
187
+ (1): Module(
188
+ (block): ModuleList(
189
+ (0-1): 2 x ResnetBlock(
190
+ (norm1): GroupNorm(32, 64, eps=1e-06, affine=True)
191
+ (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
192
+ (norm2): GroupNorm(32, 64, eps=1e-06, affine=True)
193
+ (dropout): Dropout(p=0.0, inplace=False)
194
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
195
+ )
196
+ )
197
+ (attn): ModuleList()
198
+ (upsample): Upsample(
199
+ (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
200
+ )
201
+ )
202
+ (2): Module(
203
+ (block): ModuleList(
204
+ (0): ResnetBlock(
205
+ (norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
206
+ (conv1): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
207
+ (norm2): GroupNorm(32, 64, eps=1e-06, affine=True)
208
+ (dropout): Dropout(p=0.0, inplace=False)
209
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
210
+ (nin_shortcut): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))
211
+ )
212
+ (1): ResnetBlock(
213
+ (norm1): GroupNorm(32, 64, eps=1e-06, affine=True)
214
+ (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
215
+ (norm2): GroupNorm(32, 64, eps=1e-06, affine=True)
216
+ (dropout): Dropout(p=0.0, inplace=False)
217
+ (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
218
+ )
219
+ )
220
+ (attn): ModuleList()
221
+ (upsample): Upsample(
222
+ (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
223
+ )
224
+ )
225
+ (3): Module(
226
+ (block): ModuleList(
227
+ (0-1): 2 x ResnetBlock(
228
+ (norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
229
+ (conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
230
+ (norm2): GroupNorm(32, 128, eps=1e-06, affine=True)
231
+ (dropout): Dropout(p=0.0, inplace=False)
232
+ (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
233
+ )
234
+ )
235
+ (attn): ModuleList()
236
+ (upsample): Upsample(
237
+ (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
238
+ )
239
+ )
240
+ )
241
+ (norm_out): GroupNorm(32, 32, eps=1e-06, affine=True)
242
+ (conv_out): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
243
+ )
244
+ )
245
+ (vit_decoder): DiT2(
246
+ (blocks): ModuleList(
247
+ (0-23): 24 x DiTBlock2(
248
+ (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=False)
249
+ (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=False)
250
+ (attn): MemEffAttention(
251
+ (qkv): Linear(in_features=1024, out_features=3072, bias=True)
252
+ (attn_drop): Dropout(p=0.0, inplace=False)
253
+ (proj): Linear(in_features=1024, out_features=1024, bias=True)
254
+ (proj_drop): Dropout(p=0.0, inplace=False)
255
+ (q_norm): Identity()
256
+ (k_norm): Identity()
257
+ )
258
+ (mlp): FusedMLP(
259
+ (mlp): Sequential(
260
+ (0): Linear(in_features=1024, out_features=4096, bias=False)
261
+ (1): FusedDropoutBias(
262
+ (activation_pytorch): GELU(approximate='none')
263
+ )
264
+ (2): Linear(in_features=4096, out_features=1024, bias=False)
265
+ (3): FusedDropoutBias(
266
+ (activation_pytorch): Identity()
267
+ )
268
+ )
269
+ )
270
+ (adaLN_modulation): Sequential(
271
+ (0): SiLU()
272
+ (1): Linear(in_features=1024, out_features=6144, bias=True)
273
+ )
274
+ )
275
+ )
276
+ )
277
+ (triplane_decoder): Triplane(
278
+ (renderer): ImportanceRenderer(
279
+ (ray_marcher): MipRayMarcher2()
280
+ )
281
+ (ray_sampler): PatchRaySampler()
282
+ (decoder): OSGDecoder(
283
+ (net): Sequential(
284
+ (0): FullyConnectedLayer(in_features=32, out_features=64, activation=linear)
285
+ (1): Softplus(beta=1.0, threshold=20.0)
286
+ (2): FullyConnectedLayer(in_features=64, out_features=4, activation=linear)
287
+ )
288
+ )
289
+ )
290
+ (decoder_pred): None
291
+ )
292
+ )
293
+ create dataset
294
+ joint_denoise_rec_model enables AMP to accelerate training
logs/LSGM/inference/Objaverse/i23d/dit-L2/progress.csv ADDED
File without changes
nsr/__pycache__/train_util_diffusion.cpython-310.pyc CHANGED
Binary files a/nsr/__pycache__/train_util_diffusion.cpython-310.pyc and b/nsr/__pycache__/train_util_diffusion.cpython-310.pyc differ
 
vit/__pycache__/vision_transformer.cpython-310.pyc CHANGED
Binary files a/vit/__pycache__/vision_transformer.cpython-310.pyc and b/vit/__pycache__/vision_transformer.cpython-310.pyc differ