File size: 3,593 Bytes
250af62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
model:
base_learning_rate: 0.0001
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.00085
linear_end: 0.012
num_timesteps_cond: 1
log_every_t: 200
timesteps: 1000
first_stage_key: image_target
cond_stage_key: image_cond
image_size: 32
channels: 4
cond_stage_trainable: false
conditioning_key: hybrid
monitor: val/loss_simple_ema
scale_factor: 0.18215
conditioning_config:
params:
mode: 7dof_quantile_scale
embedding_dim: 19
depth_model_name: midas
scheduler_config:
target: ldm.lr_scheduler.LambdaLinearScheduler
params:
warm_up_steps:
- 100
cycle_lengths:
- 10000000000000
f_start:
- 1.0e-06
f_max:
- 1.0
f_min:
- 1.0
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32
in_channels: 8
out_channels: 4
model_channels: 320
attention_resolutions:
- 4
- 2
- 1
num_res_blocks: 2
channel_mult:
- 1
- 2
- 4
- 4
num_heads: 8
use_spatial_transformer: true
transformer_depth: 1
context_dim: 768
use_checkpoint: true
legacy: false
eval_config:
params:
scale: 3.0
ddim_steps: 100
ddim_eta: 1.0
lpips_model_path: null
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.FrozenCLIPImageEmbedder
data:
target: ldm.data.simple.WDSGenericDataModule
params:
train_config:
batch_size: 48
num_workers: 6
shuffle_buffer_size: 500
prefetch_factor: 4
dataset_config_1:
dataset_n_shards: 127
dataset_name: co3d
views_per_scene: 100
dataset_n_scenes: 18432
rate: 0.025
probability: 0.34
dataset_url: null
dataset_config_2:
dataset_n_shards: 127
dataset_name: re10k
views_per_scene: 200
dataset_n_scenes: 65280
probability: 0.33
rate: 0.025
dataset_url: null
dataset_config_3:
dataset_n_shards: 127
dataset_name: acid
views_per_scene: 100
dataset_n_scenes: 12032
probability: 0.33
rate: 0.025
dataset_url: null
val_config:
batch_size: 1
subsample: 1.0
scene_scale: 1.0
dataset_n_shards: 1
dataset_name: co3d
dataset_n_scenes: 150
num_workers: 1
shuffle_buffer_size: 20
rate: 0.1
dataset_url: null
--lightning:
trainer:
accumulate_grad_batches: 4
modelcheckpoint:
params:
every_n_train_steps: 2500
--data:
params:
train_config:
batch_size: 48
val_config:
batch_size: 1
--model:
params:
conditioning_config:
params:
mode: 7dof_quantile_scale
embedding_dim: 19
eval_config:
params:
ddim_steps: 100
base_learning_rate: 0.0001
--args:
finetune_from: null
|