model: target: BOOXEL.models.BOOXEL_model.BOOXELModel params: ae_dtype: bf16 diffusion_dtype: fp16 scale_factor: 0.13025 disable_first_stage_autocast: True network_wrapper: sgm.modules.diffusionmodules.wrappers.ControlWrapper denoiser_config: target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiserWithControl params: num_idx: 1000 weighting_config: target: sgm.modules.diffusionmodules.denoiser_weighting.EpsWeighting scaling_config: target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling discretization_config: target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization control_stage_config: target: BOOXEL.modules.BOOXEL_v0.GLVControl params: adm_in_channels: 2816 num_classes: sequential use_checkpoint: True in_channels: 4 out_channels: 4 model_channels: 320 attention_resolutions: [4, 2] num_res_blocks: 2 channel_mult: [1, 2, 4] num_head_channels: 64 use_spatial_transformer: True use_linear_in_transformer: True transformer_depth: [1, 2, 10] # 注:第一个未使用(因为 attn_res 从 2 开始)32, 16, 8 --> 64, 32, 16 # transformer_depth: [1, 1, 4] context_dim: 2048 spatial_transformer_attn_type: softmax-xformers legacy: False input_upscale: 1 network_config: target: BOOXEL.modules.BOOXEL_v0.LightGLVUNet params: mode: XL-base project_type: ZeroSFT project_channel_scale: 2 adm_in_channels: 2816 num_classes: sequential use_checkpoint: True in_channels: 4 out_channels: 4 model_channels: 320 attention_resolutions: [4, 2] num_res_blocks: 2 channel_mult: [1, 2, 4] num_head_channels: 64 use_spatial_transformer: True use_linear_in_transformer: True transformer_depth: [1, 2, 10] # 注:第一个未使用(因为 attn_res 从 2 开始)32, 16, 8 --> 64, 32, 16 context_dim: 2048 spatial_transformer_attn_type: softmax-xformers legacy: False conditioner_config: target: sgm.modules.GeneralConditionerWithControl params: emb_models: # 交叉连接条件 - is_trainable: False input_key: txt target: sgm.modules.encoders.modules.FrozenCLIPEmbedder params: layer: hidden layer_idx: 11 # 交叉和矢量条件 - is_trainable: False input_key: txt target: sgm.modules.encoders.modules.FrozenOpenCLIPEmbedder2 params: arch: ViT-bigG-14 version: laion2b_s39b_b160k freeze: True layer: penultimate always_return_pooled: True legacy: False # 向量条件 - is_trainable: False input_key: original_size_as_tuple target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND params: outdim: 256 # 乘以二 # 向量条件 - is_trainable: False input_key: crop_coords_top_left target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND params: outdim: 256 # 乘以二 # 向量条件 - is_trainable: False input_key: target_size_as_tuple target: sgm.modules.encoders.modules.ConcatTimestepEmbedderND params: outdim: 256 # 乘以二 first_stage_config: target: sgm.models.autoencoder.AutoencoderKLInferenceWrapper params: ckpt_path: ~ embed_dim: 4 monitor: val/rec_loss ddconfig: attn_type: vanilla-xformers double_z: true z_channels: 4 resolution: 256 in_channels: 3 out_ch: 3 ch: 128 ch_mult: [ 1, 2, 4, 4 ] num_res_blocks: 2 attn_resolutions: [ ] dropout: 0.0 lossconfig: target: torch.nn.Identity sampler_config: target: sgm.modules.diffusionmodules.sampling.RestoreDPMPP2MSampler params: num_steps: 100 restore_cfg: 4.0 s_churn: 0 s_noise: 1.003 discretization_config: target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization guider_config: target: sgm.modules.diffusionmodules.guiders.LinearCFG params: scale: 7.5 scale_min: 4.0 p_p: '电影级,高对比度,高度精细,使用哈苏相机拍摄,超精细照片,逼真的最大细节,32K,调色,超高清,极致的细节,皮肤毛孔细节,超清晰度,完美无变形。' n_p: '绘画,油画,插图,绘图,艺术,素描,动漫,卡通,CG 风格,3D 渲染,虚幻引擎,模糊,混色,不清晰,怪异纹理,丑陋,肮脏,凌乱,质量最差,质量低,框架,水印,签名,JPEG 伪影,变形,低分辨率,过度平滑' SDXL_CKPT: RunDiffusion_Juggernaut-XL-Lightning/Juggernaut_RunDiffusionPhoto2_Lightning_4Steps.safetensors BOOXEL_CKPT_F: yanranxiaoxi_booxel/BOOXEL-v0.F.ckpt BOOXEL_CKPT_Q: yanranxiaoxi_booxel/BOOXEL-v0.Q.ckpt BOOXEL_CKPT: ~ default_setting: s_cfg_Quality: 2.0 spt_linear_CFG_Quality: 2.0 s_cfg_Fidelity: 1.5 spt_linear_CFG_Fidelity: 1.5 edm_steps: 8