Text-to-3D
image-to-3d
code / taming-transformers /configs /imagenet_vqgan.yaml
Chao Xu
add taming
169a228
raw
history blame
949 Bytes
model:
base_learning_rate: 4.5e-6
target: taming.models.vqgan.VQModel
params:
embed_dim: 256
n_embed: 1024
ddconfig:
double_z: False
z_channels: 256
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [ 1,1,2,2,4] # num_down = len(ch_mult)-1
num_res_blocks: 2
attn_resolutions: [16]
dropout: 0.0
lossconfig:
target: taming.modules.losses.vqperceptual.VQLPIPSWithDiscriminator
params:
disc_conditional: False
disc_in_channels: 3
disc_start: 250001
disc_weight: 0.8
codebook_weight: 1.0
data:
target: main.DataModuleFromConfig
params:
batch_size: 12
num_workers: 24
train:
target: taming.data.imagenet.ImageNetTrain
params:
config:
size: 256
validation:
target: taming.data.imagenet.ImageNetValidation
params:
config:
size: 256