MiniMax-VL-01 / config.json
MiniMax-AI's picture
Initial Commit
cfde609
raw
history blame
3.92 kB
{
"architectures": [
"MiniMaxVL01ForConditionalGeneration"
],
"auto_map": {
"AutoModelForCausalLM": "modeling_minimax_vl_01.MiniMaxVL01ForConditionalGeneration",
"AutoConfig": "configuration_minimax_vl_01.MiniMaxVL01Config"
},
"ignore_index": -100,
"image_grid_pinpoints": [
[
336,
336
],
[
336,
672
],
[
336,
1008
],
[
336,
1344
],
[
336,
1680
],
[
336,
2016
],
[
672,
336
],
[
672,
672
],
[
672,
1008
],
[
672,
1344
],
[
672,
1680
],
[
672,
2016
],
[
1008,
336
],
[
1008,
672
],
[
1008,
1008
],
[
1008,
1344
],
[
1008,
1680
],
[
1008,
2016
],
[
1344,
336
],
[
1344,
672
],
[
1344,
1008
],
[
1344,
1344
],
[
1680,
336
],
[
1680,
672
],
[
1680,
1008
],
[
2016,
336
],
[
2016,
672
],
[
2016,
1008
]
],
"image_token_index": 200025,
"model_type": "minimax_vl_01",
"projector_hidden_act": "gelu",
"text_config": {
"architectures": [
"MiniMaxText01ForCausalLM"
],
"attn_type_list": [
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
1
],
"bos_token_id": null,
"eos_token_id": null,
"head_dim": 128,
"hidden_size": 6144,
"intermediate_size": 9216,
"layernorm_full_attention_alpha": 3.5565588200778455,
"layernorm_full_attention_beta": 1.0,
"layernorm_linear_attention_alpha": 3.5565588200778455,
"layernorm_linear_attention_beta": 1.0,
"layernorm_mlp_alpha": 3.5565588200778455,
"layernorm_mlp_beta": 1.0,
"max_position_embeddings": 8192,
"model_type": "minimax_text_01",
"num_attention_heads": 64,
"num_experts_per_tok": 2,
"num_hidden_layers": 80,
"num_key_value_heads": 8,
"num_local_experts": 32,
"postnorm": true,
"rms_norm_eps": 1e-05,
"rope_theta": 10000000,
"rotary_dim": 64,
"shared_intermediate_size": [
0
],
"shared_moe_mode": "sigmoid",
"vocab_size": 200064
},
"transformers_version": "4.42.3",
"vision_config": {
"auto_map": {
"AutoModel": "modeling_clip.CLIPVisionModel"
},
"hidden_act": "gelu",
"hidden_size": 1024,
"image_size": 336,
"intermediate_size": 4096,
"model_type": "clip_vision_model",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"patch_size": 14,
"projection_dim": 6144,
"vocab_size": 32000
},
"torch_dtype": "bfloat16",
"vision_feature_layer": -1,
"vision_feature_select_strategy": "default"
}