Shufan Li
init
ef579dd
raw
history blame contribute delete
821 Bytes
{
"_class_name": "SD3Transformer2DModelWithAudioHQ",
"_diffusers_version": "0.30.0.dev0",
"_name_or_path": "/localhome/data/ckpts/shared/stable-diffusion-3.5-medium",
"add_audio": true,
"add_clip": false,
"attention_head_dim": 64,
"audio_input_dim": 8,
"caption_projection_dim": 1536,
"decoder_config": "",
"drop_audio": false,
"drop_image": false,
"drop_text": false,
"dual_attention_layers": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12
],
"image_bind": false,
"in_channels": 16,
"joint_attention_dim": 4096,
"num_attention_heads": 24,
"num_layers": 24,
"out_channels": 16,
"patch_size": 2,
"pooled_projection_dim": 2048,
"pos_embed_max_size": 384,
"qk_norm": "rms_norm",
"sample_size": 128,
"use_audio_mae": false
}