LN3Diff_I23D / log.txt
NIRVANALAN
update
c00df70
Logging to ./
creating model and diffusion...
creating 3DAE...
length of vit_decoder.blocks: 24
init pos_embed with sincos
length of vit_decoder.blocks: 24
ignore dim_up_mlp: True
AE(
(encoder): MVEncoderGSDynamicInp(
(conv_in): Conv2d(10, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(down): ModuleList(
(0): Module(
(block): ModuleList(
(0): ResnetBlock(
(norm1): GroupNorm(32, 64, eps=1e-06, affine=True)
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 64, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(attn): ModuleList()
(downsample): Downsample(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2))
)
)
(1): Module(
(block): ModuleList(
(0): ResnetBlock(
(norm1): GroupNorm(32, 64, eps=1e-06, affine=True)
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 128, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nin_shortcut): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1))
)
)
(attn): ModuleList()
(downsample): Downsample(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2))
)
)
(2): Module(
(block): ModuleList(
(0): ResnetBlock(
(norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
(conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 256, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nin_shortcut): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
)
)
(attn): ModuleList()
(downsample): Downsample(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2))
)
)
(3): Module(
(block): ModuleList(
(0): ResnetBlock(
(norm1): GroupNorm(32, 256, eps=1e-06, affine=True)
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 256, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(attn): ModuleList()
)
)
(mid): Module(
(block_1): ResnetBlock(
(norm1): GroupNorm(32, 256, eps=1e-06, affine=True)
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 256, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(attn_1): SpatialTransformer3D(
(norm): GroupNorm(32, 256, eps=1e-06, affine=True)
(proj_in): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1))
(transformer_blocks): ModuleList(
(0): BasicTransformerBlock3D(
(attn1): MemoryEfficientCrossAttention(
(to_q): Linear(in_features=512, out_features=512, bias=False)
(to_k): Linear(in_features=512, out_features=512, bias=False)
(q_norm): Identity()
(k_norm): Identity()
(to_v): Linear(in_features=512, out_features=512, bias=False)
(to_out): Sequential(
(0): Linear(in_features=512, out_features=512, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(ff): FeedForward(
(net): Sequential(
(0): GEGLU(
(proj): Linear(in_features=512, out_features=4096, bias=True)
)
(1): Dropout(p=0.0, inplace=False)
(2): Linear(in_features=2048, out_features=512, bias=True)
)
)
(attn2): MemoryEfficientCrossAttention(
(to_q): Linear(in_features=512, out_features=512, bias=False)
(to_k): Linear(in_features=512, out_features=512, bias=False)
(q_norm): Identity()
(k_norm): Identity()
(to_v): Linear(in_features=512, out_features=512, bias=False)
(to_out): Sequential(
(0): Linear(in_features=512, out_features=512, bias=True)
(1): Dropout(p=0.0, inplace=False)
)
)
(norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
)
(proj_out): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
)
(block_2): ResnetBlock(
(norm1): GroupNorm(32, 256, eps=1e-06, affine=True)
(conv1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 256, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(norm_out): GroupNorm(32, 256, eps=1e-06, affine=True)
(conv_out): Conv2d(256, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(decoder): RodinSR_256_fusionv6_ConvQuant_liteSR_dinoInit3DAttn_SD_B_3L_C_withrollout_withSD_D_ditDecoder(
(superresolution): ModuleDict(
(ldm_upsample): PatchEmbedTriplane(
(proj): Conv2d(12, 3072, kernel_size=(2, 2), stride=(2, 2), groups=3)
(norm): Identity()
)
(quant_conv): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), groups=3)
(conv_sr): Decoder(
(conv_in): Conv2d(1024, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(mid): Module(
(block_1): ResnetBlock(
(norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 128, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
(attn_1): MemoryEfficientAttnBlock(
(norm): GroupNorm(32, 128, eps=1e-06, affine=True)
(q): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(k): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(v): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(proj_out): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
)
(block_2): ResnetBlock(
(norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 128, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(up): ModuleList(
(0): Module(
(block): ModuleList(
(0): ResnetBlock(
(norm1): GroupNorm(32, 64, eps=1e-06, affine=True)
(conv1): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 32, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nin_shortcut): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1))
)
(1): ResnetBlock(
(norm1): GroupNorm(32, 32, eps=1e-06, affine=True)
(conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 32, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(attn): ModuleList()
)
(1): Module(
(block): ModuleList(
(0-1): 2 x ResnetBlock(
(norm1): GroupNorm(32, 64, eps=1e-06, affine=True)
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 64, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(attn): ModuleList()
(upsample): Upsample(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(2): Module(
(block): ModuleList(
(0): ResnetBlock(
(norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
(conv1): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 64, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(nin_shortcut): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1))
)
(1): ResnetBlock(
(norm1): GroupNorm(32, 64, eps=1e-06, affine=True)
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 64, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(attn): ModuleList()
(upsample): Upsample(
(conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(3): Module(
(block): ModuleList(
(0-1): 2 x ResnetBlock(
(norm1): GroupNorm(32, 128, eps=1e-06, affine=True)
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(norm2): GroupNorm(32, 128, eps=1e-06, affine=True)
(dropout): Dropout(p=0.0, inplace=False)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(attn): ModuleList()
(upsample): Upsample(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
)
(norm_out): GroupNorm(32, 32, eps=1e-06, affine=True)
(conv_out): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
)
(vit_decoder): DiT2(
(blocks): ModuleList(
(0-23): 24 x DiTBlock2(
(norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=False)
(norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=False)
(attn): MemEffAttention(
(qkv): Linear(in_features=1024, out_features=3072, bias=True)
(attn_drop): Dropout(p=0.0, inplace=False)
(proj): Linear(in_features=1024, out_features=1024, bias=True)
(proj_drop): Dropout(p=0.0, inplace=False)
(q_norm): Identity()
(k_norm): Identity()
)
(mlp): FusedMLP(
(mlp): Sequential(
(0): Linear(in_features=1024, out_features=4096, bias=False)
(1): FusedDropoutBias(
(activation_pytorch): GELU(approximate='none')
)
(2): Linear(in_features=4096, out_features=1024, bias=False)
(3): FusedDropoutBias(
(activation_pytorch): Identity()
)
)
)
(adaLN_modulation): Sequential(
(0): SiLU()
(1): Linear(in_features=1024, out_features=6144, bias=True)
)
)
)
)
(triplane_decoder): Triplane(
(renderer): ImportanceRenderer(
(ray_marcher): MipRayMarcher2()
)
(ray_sampler): PatchRaySampler()
(decoder): OSGDecoder(
(net): Sequential(
(0): FullyConnectedLayer(in_features=32, out_features=64, activation=linear)
(1): Softplus(beta=1.0, threshold=20.0)
(2): FullyConnectedLayer(in_features=64, out_features=4, activation=linear)
)
)
)
(decoder_pred): None
)
)
create dataset
joint_denoise_rec_model enables AMP to accelerate training
mark joint_denoise_rec_model loading
loading model from huggingface: yslan/LN3Diff/checkpoints/objaverse/objaverse-dit/i23d/model_joint_denoise_rec_model2990000.safetensors...
mark joint_denoise_rec_model loading finished