instruct-pix2pix

Sleeping

instruct-pix2pix / configs /latent-diffusion /lsun_bedrooms-ldm-vq-4.yaml

ablattmann

add configs for training unconditional/class-conditional ldms

ec3a273 almost 3 years ago

2.02 kB

	model:
	base_learning_rate: 2.0e-06
	target: ldm.models.diffusion.ddpm.LatentDiffusion
	params:
	linear_start: 0.0015
	linear_end: 0.0195
	num_timesteps_cond: 1
	log_every_t: 200
	timesteps: 1000
	first_stage_key: image
	image_size: 64
	channels: 3
	monitor: val/loss_simple_ema
	unet_config:
	target: ldm.modules.diffusionmodules.openaimodel.UNetModel
	params:
	image_size: 64
	in_channels: 3
	out_channels: 3
	model_channels: 224
	attention_resolutions:
	# note: this isn\t actually the resolution but
	# the downsampling factor, i.e. this corresnponds to
	# attention on spatial resolution 8,16,32, as the
	# spatial reolution of the latents is 64 for f4
	- 8
	- 4
	- 2
	num_res_blocks: 2
	channel_mult:
	- 1
	- 2
	- 3
	- 4
	num_head_channels: 32
	first_stage_config:
	target: ldm.models.autoencoder.VQModelInterface
	params:
	ckpt_path: configs/first_stage_models/vq-f4/model.yaml
	embed_dim: 3
	n_embed: 8192
	ddconfig:
	double_z: false
	z_channels: 3
	resolution: 256
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult:
	- 1
	- 2
	- 4
	num_res_blocks: 2
	attn_resolutions: []
	dropout: 0.0
	lossconfig:
	target: torch.nn.Identity
	cond_stage_config: __is_unconditional__
	data:
	target: main.DataModuleFromConfig
	params:
	batch_size: 48
	num_workers: 5
	wrap: false
	train:
	target: ldm.data.lsun.LSUNBedroomsTrain
	params:
	size: 256
	validation:
	target: ldm.data.lsun.LSUNBedroomsValidation
	params:
	size: 256


	lightning:
	callbacks:
	image_logger:
	target: main.ImageLogger
	params:
	batch_frequency: 5000
	max_images: 8
	increase_log_steps: False

	trainer:
	benchmark: True