{ "image_dim": 4, "image_size": [ 480, 768 ], "image_stride": 8, "text_token_dim": 2560, "text_token_len": 256, "video_mixer_rank": 24, "video_base_size": [ 16, 15, 24 ], "image_base_size": [ 30, 48 ], "arch": [ "vit_d16w1024", "vit_d32w1024", "mlp_d3w1280" ] }