{ "metadata": { "ParamSize": 325, "ParamBytes": 5019811840.0, "BitsPerParam": 5.000895173865207 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262668288, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262668288, "byteOffset": 0 } ], "md5sum": "fb2d02f651e4bfdc6fe8b95a71d24c94" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4e59a855acde76d13879fa6cb99611b3" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32841728, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32833536, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32833536 } ], "md5sum": "352ee11eb2f549968b4825478dc7775d" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 262668288, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262668288, "byteOffset": 0 } ], "md5sum": "3ab443cff6112201ae53cd3baefedcdd" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32833536, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 32833536, "byteOffset": 0 } ], "md5sum": "7b04b4997972b5c339e8adc5617a0522" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33054720, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3686400 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3694592 } ], "md5sum": "e4017a9a1b2bf2338a444477c8608060" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "186998dc1ee8f17d7164d935c861c0a3" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "6d7afb284a49a88c2ddacaf726b5a42b" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fec640ff69773da57e6be577c1f52650" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "76bbbb747e31f00c2d1e09edef4420ef" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "71b53d9a3979def985effa85b116093f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "af202fb8683b4c5c2b457734f94281c7" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b953e11666f0b454a22a74c493033c1b" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "03df88d71a895d9ddd34eed0da219f30" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2af83a5654be7fd8d9221935265cea42" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1ac61362dae430431e128d9769db1a85" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "d402236028d81430e8e8d43f87849429" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f44d9109d741857cd316dd4785bb4612" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "1bd5b407d2ec6759735bbcd02b6d299d" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "79292d5fdbbad863ed359b4199158c0c" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "3b558fc17be27dbb7fd1de2fb5eb62b5" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e619002bdb11fd6372d556f36a950b90" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "894ab1aedaf0eb242e8e678093c7ec92" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "773c4348776129836e0e6a556874e2b9" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "31cb75422d25f2028db55edb887371dd" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2598535b7f0ccf23292fc954068239a4" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "cefd4137a2c8f093b87c9e4321878ddb" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "320e04cdb4de1fe23dfb235b15319903" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e92c589e7dad4d6e674c21d8699a3576" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "97e1814f6e436dd3ab68bb671d68e20b" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d39351a0c5c81b51f602ae9c5d2e46a1" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "3e1c9edcff72dbb0acbaece3ecf14617" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e586743efab7855e986fbf108160f6dc" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "db5834976d49b828843ea07ba99a2d43" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "722cf6487b0434876be49161c1463f7e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "19e501c63179b44b9406bb741a053241" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "5d0bd6506b731ab4e8cbc594906b15dd" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7d6e32b84cc8fe7028039170ed5be671" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "59e325424bd9ba72981810e45fa02348" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "6b2c2f65379232f93c6cf7d9d41d011a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bc8a1605b258153e879a1b12b87ece2f" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "8236e24025b645578b6b076b0d9819e9" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "f84b5df1bd8e378c4110fa1574f0972f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cab379990a69c21d4065b658ba166fd7" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "e7a8db931bdaed95bc6b144ed5442c30" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "156d0fd69d36c1c7b79c647606f5c639" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "944d55d138f0547c679357adab37f9a1" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a967f484d8d1e9e99dd3cb78598f83b7" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "211f9e28e82f06c7e7c7a0ed6c2527d0" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "0d0231dfa4a208677e13fa6a0ee4e072" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "10fcc57eef83f4105e41e0c745a1953d" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "568496f1cb730330b39583a55946d7f5" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "b9a801f218ecd9e53cd6b7e3b2150d82" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cfcc1d8106d570e9bd868881a7f8f214" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "85c2cc8ad04efbc991672977297ca162" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "6cd4bfe385d9e627ed31274b7b8159bd" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "eb55d7241b7ed0cc60eb3cbae0467e31" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "360a148cb25de23e1cd650e9bd7fc96c" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4e76b3afafbb43d6b5b883fa0c67e6ec" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "ba942ce5d0b387a61bc41417c1b52bcf" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9a9bc481aaa9d2ab63142e26b160d320" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ee0ce253a45150b12b13f527686ef113" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "604a8976e9f95c26f5dc727ed4a24c3c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1ea0950837b5eaf691af29959d3a93bb" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6a1447b1d8900ee952e706ec2fc09aa6" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "64c7b3516154cc881b370204553e0436" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2b68f6d87d61157547eaa366b30045b2" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 30932992, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 23592960 } ], "md5sum": "3a69370a9895e46b6eeaac80f23bbfa5" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d7d8be646cd5f107e56eb85c86e9fc41" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "361e74e85facdc99ff3308a142486500" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "2bd59033bb19557156ae23e3ec905dd5" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f47c4097dba5ef15cb73de9baf979901" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "78f1c51e11fa938d3c6d7f1340ab434f" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3670016 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 3678208 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 3686400 } ], "md5sum": "9d148ed4d267527f7e9754d5c5e7086d" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "aa27bbc7a2e0d25c0a1c8e061b84d689" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "0ba9abd476cb05ada38c9fc353171ca9" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bc60f0ff7768f86ff01de5cfd40bb8a7" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "29ec63ab6007c867d297e410b485253b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "e784b737a52331f00abe5bfb7766adec" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f31b33dc98988dbb0e71a75c11f70b1c" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c282dffcee185ed11f6fde80e0684d7a" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "fb6d5d91ecf707f941b8349cdee2e10a" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1f9b6d5a416e6f00db89c6abee2e4857" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3b8caaad522b7161f41baa1649863976" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "109bd0d5bd711eea54b12df515ffc154" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7cdb65ba54150ace6e70c03091f97d00" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "6e28ee09f75450add55204696a4071eb" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "53beb490b503b77ff7a2f2eb7cf27687" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "1928bf0448f98c09c727cc2768684733" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "10fe3bbb757c28dd684b76cc7a2b3768" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cf26f7fce12a20c528407e9402c624c0" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "7b4925d8a0bee62bd2fc3a02f6d9d2ba" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b223afe9d640da2a1301a104ef1b3cb1" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9106c7af94fb22222122ab1da1dd4211" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "56896eeaa62208d09edd6438abc0732a" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "093c55d9a160a3726054126760c9e442" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "dedc950d0df6bf6751199b97fe215927" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "5a1d7ad86e1c6677d74018e62c6f4c72" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a9929f87fecfcd5f516d5228611f9b5c" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "233fa1c6a739cbd142fca4f4832c8115" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d7d814f458bc550632557acc63113e9c" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "b78d1b861062dd0f81a24a31cdb019d7" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "44101bb237ae80bc1ffd643948aefd6a" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "073264a3ef3c9bf9882568787198b376" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "fd37e14d8427cb443a4b95c715da27c8" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c7b3f0fab113205c409b4568dee89c68" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 32505856, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7340032, "byteOffset": 11010048 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 18350080 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1572864, "byteOffset": 30932992 } ], "md5sum": "12f20f4387ca5876afc7d1e4497890b5" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 9437184, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 } ], "md5sum": "c22b402fad0136ad1be93dece6c93e59" } ] }