Update configs
Browse files- diffusion_net/config.json +2 -24
- reference_net/config.json +0 -1
diffusion_net/config.json
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
"_center_input_sample": false,
|
3 |
"_class_name": "UNet3DConditionModel",
|
4 |
"_diffusers_version": "0.31.0",
|
5 |
-
"_landmark_net": false,
|
6 |
"_out_channels": 4,
|
7 |
"act_fn": "silu",
|
8 |
"addition_embed_type": null,
|
@@ -32,7 +31,6 @@
|
|
32 |
"dropout": 0.0,
|
33 |
"dual_cross_attention": false,
|
34 |
"emo_drop_rate": 0.05,
|
35 |
-
"emotion_signal": true,
|
36 |
"encoder_hid_dim": null,
|
37 |
"encoder_hid_dim_type": null,
|
38 |
"flip_sin_to_cos": true,
|
@@ -42,7 +40,6 @@
|
|
42 |
"mid_block_only_cross_attention": null,
|
43 |
"mid_block_scale_factor": 1,
|
44 |
"mid_block_type": "UNetMidBlock3DCrossAttn",
|
45 |
-
"motion_module_decoder_only": false,
|
46 |
"motion_module_kwargs": {
|
47 |
"attention_block_types": [
|
48 |
"Temporal_Self",
|
@@ -52,17 +49,14 @@
|
|
52 |
"num_transformer_block": 1,
|
53 |
"temporal_attention_dim_div": 1,
|
54 |
"temporal_position_encoding": true,
|
55 |
-
"temporal_position_encoding_max_len": 32
|
56 |
-
"use_linear_attn": true
|
57 |
},
|
58 |
-
"motion_module_mid_block": true,
|
59 |
"motion_module_resolutions": [
|
60 |
1,
|
61 |
2,
|
62 |
4,
|
63 |
8
|
64 |
],
|
65 |
-
"motion_module_type": "MemoryLinearAttn",
|
66 |
"norm_eps": 1e-05,
|
67 |
"norm_num_groups": 32,
|
68 |
"num_attention_heads": null,
|
@@ -73,24 +67,12 @@
|
|
73 |
"resnet_time_scale_shift": "default",
|
74 |
"reverse_transformer_layers_per_block": null,
|
75 |
"sample_size": 64,
|
76 |
-
"stack_enable_blocks_depth": [
|
77 |
-
0,
|
78 |
-
1,
|
79 |
-
2,
|
80 |
-
3
|
81 |
-
],
|
82 |
-
"stack_enable_blocks_name": [
|
83 |
-
"up",
|
84 |
-
"down",
|
85 |
-
"mid"
|
86 |
-
],
|
87 |
"time_cond_proj_dim": null,
|
88 |
"time_embedding_act_fn": null,
|
89 |
"time_embedding_dim": null,
|
90 |
"time_embedding_type": "positional",
|
91 |
"timestep_post_act": null,
|
92 |
"transformer_layers_per_block": 1,
|
93 |
-
"two_branches_atten": true,
|
94 |
"unet_use_cross_frame_attention": false,
|
95 |
"unet_use_temporal_attention": false,
|
96 |
"up_block_types": [
|
@@ -100,10 +82,6 @@
|
|
100 |
"CrossAttnUpBlock3D"
|
101 |
],
|
102 |
"upcast_attention": false,
|
103 |
-
"use_audio_module": true,
|
104 |
-
"use_face_masks": false,
|
105 |
"use_inflated_groupnorm": true,
|
106 |
-
"use_linear_projection": false
|
107 |
-
"use_motion_module": true,
|
108 |
-
"use_past_frames": true
|
109 |
}
|
|
|
2 |
"_center_input_sample": false,
|
3 |
"_class_name": "UNet3DConditionModel",
|
4 |
"_diffusers_version": "0.31.0",
|
|
|
5 |
"_out_channels": 4,
|
6 |
"act_fn": "silu",
|
7 |
"addition_embed_type": null,
|
|
|
31 |
"dropout": 0.0,
|
32 |
"dual_cross_attention": false,
|
33 |
"emo_drop_rate": 0.05,
|
|
|
34 |
"encoder_hid_dim": null,
|
35 |
"encoder_hid_dim_type": null,
|
36 |
"flip_sin_to_cos": true,
|
|
|
40 |
"mid_block_only_cross_attention": null,
|
41 |
"mid_block_scale_factor": 1,
|
42 |
"mid_block_type": "UNetMidBlock3DCrossAttn",
|
|
|
43 |
"motion_module_kwargs": {
|
44 |
"attention_block_types": [
|
45 |
"Temporal_Self",
|
|
|
49 |
"num_transformer_block": 1,
|
50 |
"temporal_attention_dim_div": 1,
|
51 |
"temporal_position_encoding": true,
|
52 |
+
"temporal_position_encoding_max_len": 32
|
|
|
53 |
},
|
|
|
54 |
"motion_module_resolutions": [
|
55 |
1,
|
56 |
2,
|
57 |
4,
|
58 |
8
|
59 |
],
|
|
|
60 |
"norm_eps": 1e-05,
|
61 |
"norm_num_groups": 32,
|
62 |
"num_attention_heads": null,
|
|
|
67 |
"resnet_time_scale_shift": "default",
|
68 |
"reverse_transformer_layers_per_block": null,
|
69 |
"sample_size": 64,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
"time_cond_proj_dim": null,
|
71 |
"time_embedding_act_fn": null,
|
72 |
"time_embedding_dim": null,
|
73 |
"time_embedding_type": "positional",
|
74 |
"timestep_post_act": null,
|
75 |
"transformer_layers_per_block": 1,
|
|
|
76 |
"unet_use_cross_frame_attention": false,
|
77 |
"unet_use_temporal_attention": false,
|
78 |
"up_block_types": [
|
|
|
82 |
"CrossAttnUpBlock3D"
|
83 |
],
|
84 |
"upcast_attention": false,
|
|
|
|
|
85 |
"use_inflated_groupnorm": true,
|
86 |
+
"use_linear_projection": false
|
|
|
|
|
87 |
}
|
reference_net/config.json
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
"_center_input_sample": false,
|
3 |
"_class_name": "UNet2DConditionModel",
|
4 |
"_diffusers_version": "0.31.0",
|
5 |
-
"_landmark_net": false,
|
6 |
"_out_channels": 4,
|
7 |
"act_fn": "silu",
|
8 |
"addition_embed_type": null,
|
|
|
2 |
"_center_input_sample": false,
|
3 |
"_class_name": "UNet2DConditionModel",
|
4 |
"_diffusers_version": "0.31.0",
|
|
|
5 |
"_out_channels": 4,
|
6 |
"act_fn": "silu",
|
7 |
"addition_embed_type": null,
|