Spaces:

fffiloni
/

X-Portrait

Sleeping

App Files Files Community

X-Portrait / config /cldm_v15_appearance_pose_local_mm.yaml

fffiloni

Migrated from GitHub

bfed184 verified 23 days ago

raw

history blame contribute delete

3.83 kB

	model:
	target: model_lib.ControlNet.cldm.cldm.ControlLDMReferenceOnly_Temporal_Pose_Local
	params:
	linear_start: 0.00085
	linear_end: 0.0120
	num_timesteps_cond: 1
	log_every_t: 200
	timesteps: 1000
	first_stage_key: "jpg"
	cond_stage_key: "txt"
	control_key: "hint"
	image_size: 64
	channels: 4
	cond_stage_trainable: false
	conditioning_key: crossattn
	monitor: val/loss_simple_ema
	scale_factor: 0.18215
	use_ema: False
	only_mid_control: False

	appearance_control_stage_config:
	target: model_lib.ControlNet.cldm.cldm.ControlNetReferenceOnly
	params:
	image_size: 32 # unused
	in_channels: 4
	hint_channels: 3
	out_channels: 4
	model_channels: 320
	attention_resolutions: [ 4, 2, 1 ]
	num_res_blocks: 2
	channel_mult: [ 1, 2, 4, 4 ]
	num_heads: 8
	use_spatial_transformer: True
	transformer_depth: 1
	context_dim: 768
	use_checkpoint: True
	legacy: False

	pose_control_stage_config:
	target: model_lib.ControlNet.cldm.cldm.ControlNet
	params:
	image_size: 32 # unused
	in_channels: 4
	hint_channels: 3
	model_channels: 320
	attention_resolutions: [ 4, 2, 1 ]
	num_res_blocks: 2
	channel_mult: [ 1, 2, 4, 4 ]
	num_heads: 8
	use_spatial_transformer: True
	transformer_depth: 1
	context_dim: 768
	use_checkpoint: True
	legacy: False

	local_pose_control_stage_config:
	target: model_lib.ControlNet.cldm.cldm.ControlNet
	params:
	image_size: 32 # unused
	in_channels: 4
	hint_channels: 3
	model_channels: 320
	attention_resolutions: [ 4, 2, 1 ]
	num_res_blocks: 2
	channel_mult: [ 1, 2, 4, 4 ]
	num_heads: 8
	use_spatial_transformer: True
	transformer_depth: 1
	context_dim: 768
	use_checkpoint: True
	legacy: False

	unet_config:
	target: model_lib.ControlNet.cldm.cldm.ControlledUnetModelAttn_Temporal_Pose_Local
	params:
	image_size: 32 # unused
	in_channels: 4
	out_channels: 4
	model_channels: 320
	attention_resolutions: [ 4, 2, 1 ]
	num_res_blocks: 2
	channel_mult: [ 1, 2, 4, 4 ]
	num_heads: 8
	use_spatial_transformer: True
	transformer_depth: 1
	context_dim: 768
	use_checkpoint: True
	legacy: False

	unet_additional_kwargs:
	use_motion_module : true
	motion_module_resolutions : [ 1,2,4,8 ]
	unet_use_cross_frame_attention : false
	unet_use_temporal_attention : false

	motion_module_type: Vanilla
	motion_module_kwargs:
	num_attention_heads : 8
	num_transformer_block : 1
	attention_block_types : [ "Temporal_Self", "Temporal_Self" ]
	temporal_position_encoding : true
	temporal_position_encoding_max_len : 24
	temporal_attention_dim_div : 1
	zero_initialize : true

	first_stage_config:
	target: model_lib.ControlNet.ldm.models.autoencoder.AutoencoderKL
	params:
	embed_dim: 4
	monitor: val/rec_loss
	ddconfig:
	double_z: true
	z_channels: 4
	resolution: 256
	in_channels: 3
	out_ch: 3
	ch: 128
	ch_mult:
	- 1
	- 2
	- 4
	- 4
	num_res_blocks: 2
	attn_resolutions: []
	dropout: 0.0
	lossconfig:
	target: torch.nn.Identity

	cond_stage_config:
	target: model_lib.ControlNet.ldm.modules.encoders.modules.FrozenCLIPEmbedder