benjamin-paine commited on
Commit
b427ef8
·
verified ·
1 Parent(s): 29db607

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +193 -0
config.json ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vae": {
3
+ "_class_name": "AutoencoderKL",
4
+ "_diffusers_version": "0.26.1",
5
+ "_name_or_path": "damo-vilab/text-to-video-ms-1.7b",
6
+ "act_fn": "silu",
7
+ "block_out_channels": [
8
+ 128,
9
+ 256,
10
+ 512,
11
+ 512
12
+ ],
13
+ "down_block_types": [
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D",
17
+ "DownEncoderBlock2D"
18
+ ],
19
+ "force_upcast": true,
20
+ "in_channels": 3,
21
+ "latent_channels": 4,
22
+ "layers_per_block": 2,
23
+ "norm_num_groups": 32,
24
+ "out_channels": 3,
25
+ "sample_size": 512,
26
+ "scaling_factor": 0.18215,
27
+ "up_block_types": [
28
+ "UpDecoderBlock2D",
29
+ "UpDecoderBlock2D",
30
+ "UpDecoderBlock2D",
31
+ "UpDecoderBlock2D"
32
+ ]
33
+ },
34
+ "unet": {
35
+ "_class_name": "UNet3DConditionModel",
36
+ "_diffusers_version": "0.26.1",
37
+ "_name_or_path": "damo-vilab/text-to-video-ms-1.7b",
38
+ "act_fn": "silu",
39
+ "attention_head_dim": 64,
40
+ "block_out_channels": [
41
+ 320,
42
+ 640,
43
+ 1280,
44
+ 1280
45
+ ],
46
+ "concat": false,
47
+ "cross_attention_dim": 1024,
48
+ "down_block_types": [
49
+ "CrossAttnDownBlock3D",
50
+ "CrossAttnDownBlock3D",
51
+ "CrossAttnDownBlock3D",
52
+ "DownBlock3D"
53
+ ],
54
+ "downsample_padding": 1,
55
+ "in_channels": 4,
56
+ "layers_per_block": 2,
57
+ "merging_mode": "attention_cross_attention",
58
+ "mid_block_scale_factor": 1,
59
+ "norm_eps": 0.00001,
60
+ "norm_num_groups": 32,
61
+ "out_channels": 4,
62
+ "sample_size": 32,
63
+ "up_block_types": [
64
+ "UpBlock3D",
65
+ "CrossAttnUpBlock3D",
66
+ "CrossAttnUpBlock3D",
67
+ "CrossAttnUpBlock3D"
68
+ ],
69
+ "use_channel_expansion": false,
70
+ "use_fps_conditioning": false,
71
+ "use_image_embedding": true,
72
+ "use_image_tokens": true,
73
+ "use_repeat_context_img": true
74
+ },
75
+ "resampler": {
76
+ "_class_name": "ImageEmbeddingContextResampler",
77
+ "_diffusers_version": "0.26.1",
78
+ "cross_attention_dim": 1024,
79
+ "expansion_factor": 16,
80
+ "inner_dim": 1280
81
+ },
82
+ "controlnet": {
83
+ "_class_name": "ControlNetModel",
84
+ "_diffusers_version": "0.26.1",
85
+ "act_fn": "silu",
86
+ "attention_head_dim": 64,
87
+ "block_out_channels": [
88
+ 320,
89
+ 640,
90
+ 1280,
91
+ 1280
92
+ ],
93
+ "class_embed_type": null,
94
+ "conditioning_embedding_out_channels": [
95
+ 32,
96
+ 96,
97
+ 256,
98
+ 512
99
+ ],
100
+ "controlnet_conditioning_channel_order": "rgb",
101
+ "cross_attention_dim": 1024,
102
+ "down_block_types": [
103
+ "CrossAttnDownBlock3D",
104
+ "CrossAttnDownBlock3D",
105
+ "CrossAttnDownBlock3D",
106
+ "DownBlock3D"
107
+ ],
108
+ "downsample_controlnet_cond": true,
109
+ "downsample_padding": 1,
110
+ "flip_sin_to_cos": true,
111
+ "frame_expansion": "none",
112
+ "freq_shift": 0,
113
+ "global_pool_conditions": false,
114
+ "in_channels": 4,
115
+ "layers_per_block": 2,
116
+ "merging_mode": "addition",
117
+ "mid_block_scale_factor": 1,
118
+ "norm_eps": 0.00001,
119
+ "norm_num_groups": 32,
120
+ "num_class_embeds": null,
121
+ "num_frames": 8,
122
+ "num_frames_conditioning": 8,
123
+ "num_tranformers": 1,
124
+ "only_cross_attention": false,
125
+ "projection_class_embeddings_input_dim": null,
126
+ "resnet_time_scale_shift": "default",
127
+ "upcast_attention": false,
128
+ "use_controlnet_mask": false,
129
+ "use_image_embedding": false,
130
+ "use_image_encoder_normalization": false,
131
+ "use_image_tokens": false,
132
+ "use_linear_projection": false,
133
+ "use_repeat_context_img": true,
134
+ "zero_conv_mode": "Identity"
135
+ },
136
+ "text_encoder": {
137
+ "_name_or_path": "damo-vilab/text-to-video-ms-1.7b",
138
+ "architectures": [
139
+ "CLIPTextModel"
140
+ ],
141
+ "attention_dropout": 0,
142
+ "bos_token_id": 0,
143
+ "dropout": 0,
144
+ "eos_token_id": 2,
145
+ "hidden_act": "gelu",
146
+ "hidden_size": 1024,
147
+ "initializer_factor": 1,
148
+ "initializer_range": 0.02,
149
+ "intermediate_size": 4096,
150
+ "layer_norm_eps": 0.00001,
151
+ "max_position_embeddings": 77,
152
+ "model_type": "clip_text_model",
153
+ "num_attention_heads": 16,
154
+ "num_hidden_layers": 23,
155
+ "pad_token_id": 1,
156
+ "projection_dim": 512,
157
+ "torch_dtype": "float32",
158
+ "transformers_version": "4.39.0",
159
+ "vocab_size": 49408
160
+ },
161
+ "tokenizer": {
162
+ "model": "ali-vilab/text-to-video-ms-1.7b",
163
+ "subfolder": "tokenizer"
164
+ },
165
+ "scheduler": {
166
+ "_class_name": "DDIMScheduler",
167
+ "_diffusers_version": "0.26.1",
168
+ "beta_end": 0.012,
169
+ "beta_schedule": "scaled_linear",
170
+ "beta_start": 0.00085,
171
+ "clip_sample": false,
172
+ "clip_sample_range": 1,
173
+ "dynamic_thresholding_ratio": 0.995,
174
+ "num_train_timesteps": 1000,
175
+ "prediction_type": "epsilon",
176
+ "rescale_betas_zero_snr": false,
177
+ "sample_max_value": 1,
178
+ "set_alpha_to_one": false,
179
+ "skip_prk_steps": true,
180
+ "steps_offset": 1,
181
+ "thresholding": false,
182
+ "timestep_spacing": "leading",
183
+ "trained_betas": null
184
+ },
185
+ "num_frames": 16,
186
+ "num_frames_conditioning": 8,
187
+ "temp_attend_on_uncond_include_past": false,
188
+ "temp_attend_on_neighborhood_of_condition_frames": false,
189
+ "temporal_self_attention_mask_included_itself": false,
190
+ "temporal_self_attention_only_on_conditioning": false,
191
+ "spatial_attend_on_condition_frames": false,
192
+ "image_encoder_version": "laion2b_s32b_b79k"
193
+ }