numen-tech's picture
Add weights
1483955
{
"metadata": {
"ParamSize": 98,
"ParamBytes": 2471628800.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 525336576,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
128256,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 525336576,
"byteOffset": 0
}
],
"md5sum": "96ecea76fb745a56ee7bc9a00606f3da"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "45ab6a597571f55d11310ad502ecd11c"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "72068393e1767a06fd5d059251fefc72"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6fc65384fd5a6fb03c461e1022ea887f"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "cee36a8591282bcb9d842d0f32f1f04c"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 20987904,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 4096
},
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 8192
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12591104
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20979712
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20983808
}
],
"md5sum": "2f7e6171ac0351f78163af8b1a6e4762"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ed36c50971738bdd08ae2ab1150820fd"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "47e9ad785e2c976074789c8b5d83ab5a"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "3063f6be2136e6ed98c1d73b1e0ccaab"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1264f1a44eb9ab68745d01ea53da5e0b"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "a19047b7973f1066042d0bedaac882b8"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "4e8e76bfd35611401d5396504f54c276"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ae0e5310659ab9716e61d03ec2114693"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "f6a2b966db0ff70ba3125c96a5e6f8e0"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "e8004ca46980e8515eab568c169618c4"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b06dbff200487e8963abfe2a18faa10c"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "bd87851fed4a03435b0a28d9f42447c9"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "27d40a71fc5d26371d8baa46c8d72b31"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "83337b46f585fa676dcd5a8761ad3df0"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "5082712a00fe7140ef9c9215846b3d60"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "7f3978ef783e8e42895a2486316f9304"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "622b5ce5905b898048f2eaf79caedfbd"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "ace31b45f71cf2c0bf9b1056d74e1fe1"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "2f7afc97c52867936a854ad44fe0c191"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "494e89ca81bf466c8ce17b5ee5f85cdf"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "13c70878199744779a54933d728727f8"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "f6e5b110dc063026eb64f03b3fa20bee"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "910d39d3f068b37b2df084f9762571b4"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "49f61ec31d545e7569fb2a44a2b7b07f"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "1eb0bdc450a0fca3053b47dc24368565"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f5b3c59b6ab43fe2f4892ad53d246905"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "38d72936eaf8dc4bacf812d8fdb2d551"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "773e500fd6e2ad9b4b9306b1e94af630"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "90826d2d305b088531c014dd0ce66ec6"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "4a44e23743fed303acca7cd7d910a8ae"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "06a369e56d5da237caaa2b9cd972be08"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8ba19ff095dc05dfcea429ff62adc5d7"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "3fbdebbc72ec4e5529e3e90a96d9ddec"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "e0c825ac82f14adb91b255cec234cd72"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fef5beab293cc482e63947c7aa133778"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "f219b3aeb641094c9506d63d34f61594"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "afe26b0283ed633feaa3784374d866b3"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4f0cd14ab865c2f8553f79466e85a6ab"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "e47e8214362118313253663b2c3ce43e"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "8bdcc2a37898c5b61dc2e9d30f7ffcc2"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "32e3d2650ce2fcb5ba7e8ebbcf7f8b2c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "fe1013105c419278c23b765e981940fb"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 20979712,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20975616
}
],
"md5sum": "9a3e24a308a77520ec39767e540973be"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 20975616,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
3072,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12582912
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 20971520
}
],
"md5sum": "4d899f886c5eb1df29d31ae4897fae61"
}
]
}