{ "_class_name": "SD3Transformer2DModelWithAudioHQ", "_diffusers_version": "0.30.0.dev0", "_name_or_path": "/localhome/data/ckpts/shared/stable-diffusion-3.5-medium", "add_audio": true, "add_clip": false, "attention_head_dim": 64, "audio_input_dim": 8, "caption_projection_dim": 1536, "decoder_config": "", "drop_audio": false, "drop_image": false, "drop_text": false, "dual_attention_layers": [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ], "image_bind": false, "in_channels": 16, "joint_attention_dim": 4096, "num_attention_heads": 24, "num_layers": 24, "out_channels": 16, "patch_size": 2, "pooled_projection_dim": 2048, "pos_embed_max_size": 384, "qk_norm": "rms_norm", "sample_size": 128, "use_audio_mae": false }