{ "metadata": { "ParamSize": 325, "ParamBytes": 4073866240.0, "BitsPerParam": 4.500422792921966 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65540096, "records": [ { "name": "lm_head.q_weight", "shape": [ 32002, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 65540096, "byteOffset": 0 } ], "md5sum": "084dd39965558847bc2b553ecb52dd7f" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9bad18622ef0dad11218450714b314c5" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0b7978b2a3e2be8765bbe291edac7143" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ee2844a6fc90d2a9f940d5ef13f5fdc3" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "69d2050df2358ce181f10a842303be9f" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 30245376, "records": [ { "name": "lm_head.q_scale", "shape": [ 32002, 128 ], "dtype": "float16", "format": "raw", "nbytes": 8192512, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 8192512 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 8200704 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 11870720 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 19210752 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 19218944 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 19227136 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 22897152 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 30237184 } ], "md5sum": "89075d1698dc40d3fda7bb3a587d51b2" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "201588e5b2c565d40449a7c098b4a1b8" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "204eedd6b4c4ba7642c909d79e685ed8" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "2070067c82ac9bad4bf34536e702bba5" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "45aa1fd86735693c2d174e85cd50aed1" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "e1fb7db4d374451dbb0138b381328ac8" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "77370f2c37aa18329c505175ad7b46ae" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "afc184b59ba36c8481e213b3f9fd2910" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1cc09e31f92ecf20f968c2e7d70f932d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "224139a21c5bf44ac81bc4a67851a39b" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "083d78fd21c705c69d3fe85bbfbcee79" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4cf520a021c508e30322b692747a345f" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2d8862bee783e38af7dafbceb3b77a20" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "7cbd6ad570c65763e1e54f8558ca116c" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d1ec2d575fcdadeb3d85fd063539256c" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d9b8f323c6293457cc5c70a00719df2a" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "b6d2b8ebe3c28f8b34d696ab91c43a49" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "607b79a942f686be5cfa2d5979da215c" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "075439c04ed512fa1d63dda7277dea43" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d9965cbe99dba4abb29ce34068475803" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "5aa5deed27b85edfaed55f54e7ef7533" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1ca90bf0c503302a46a3b854b9c95b80" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2f690678da571b0882ee68625c12064b" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "6ac174072de3cba6fd7bacd1d1a71f32" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d1730cf2065a6db14e9f2f4d27335bdd" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "16bc52aeb517c4aec50ef1416ce47c4f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "bc5c6fa4040b7a863c8939420fde6828" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 65540096, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32002, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 65540096, "byteOffset": 0 } ], "md5sum": "5bcaa4a013e20a4899f51f71e535fda5" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b2445f371545cb8b0a0e78ca9f370d0c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31801856, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32002, 128 ], "dtype": "float16", "format": "raw", "nbytes": 8192512, "byteOffset": 23601152 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 31793664 } ], "md5sum": "f903f22d9ee7d957b72e00516df1e3e6" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "033bcf372978b8b88f588547c3bd6b65" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "3ef25aacc31e6dcb88fccf359ff99911" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ba7e24877eaab32825681671706ef630" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b2c2162cc28e232e920cf14c99dbcdc6" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "d8e8b86829e05caf1e51906c8696c304" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "344fd1b2fda887fda7725dc80bb5a84d" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "17c5a5c21205dc487bd0ff4b016d8f76" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "89bd583715cba95d607d25ce5ebd2dc1" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d5eafc068b35517fe26dd43fb7450e13" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9369ee699bde3e1905595be7ccf1a81a" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "510431799c346d067e97c1ffdc4502b3" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "eaaf5637485ad670204386cac669c81c" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "d11b8ead0abce3288ee1f8a29cdd22a6" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2776fa670846cc7508a54a15919ad8bc" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "7503be52748bb97376b0081d8073be1f" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "94ea1d1622d9c2f05012d97a11599c05" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3dadadeb027ae2815108c623e984c9a0" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "e02300ee3b531d4b3a844eaf0ad49a1d" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3eab54a9bfb54e4b393cb01bad1fb952" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "96eb9a9dcf3b9c3aa56c6f59ba9c0d9b" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "681e44955d232aa75a63ba10c91530e3" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9fea801dffeb44b7fe4a293ca704ea50" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b3e03917e555e2bad378634b88eb6c2d" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "553d4b1b218c90e779958ebd4134a258" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0f0cf77ed575679ac0982f460b15e1ea" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "90ff705a37f645c21cf1181f5c8053c5" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "04ccdc4c82c87486fd6e1268116d4ea0" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "60ad3f08c8574a2e54f03aa0adea3587" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "65cace1e20b36d04d53ce4ee34ba9334" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4e87c3c1236e0bff5169a379e5f380df" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "201c68021ccfdd160bff4440cedede61" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9c7c224913edd7a3108e30ae1b99eb88" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4c98e3fc16d0eb06a4eafe6c711e65ac" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "fff61255ef8b052d4e98f114b117d62f" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ce8ff7a94d32c91debe6da70cf0eab5c" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b155313f06b9f43956b58e6d17b76928" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "8651490cee450eac4e9f43f280ecdf03" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ff3165acea1bc6d7d2ed5f66244d2c7a" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "91197de298ca8703232da783e69e4098" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bae2b1fa5cbc3f87e8d156d73d799898" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "7a67f16bee34684c09dee7094eb1adf5" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f3f35f1ad3d451fb7b551f1771fe857d" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d37fcb026c7df17b2983d032509bb379" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "58b87f1091a2e21471ac1ea1848d58ce" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4d1b17796243dc5833e92b296e570ab2" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bdcbf574dc0f3463db17f5cdc3739ddd" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "e682349a3057a347fe0f70c5e270bc66" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 23592960, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 } ], "md5sum": "6e8f616125580a685313a6cdc02ecf25" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "08ea80d7b976fab28057314354af468b" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bc9db5cbb25367300457e0136053afc7" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "41e326d9cebb3a25c18ff33bd42faab9" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ba6b2730383484d9bd6d8b41954e393d" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "47e69ff0d065c375725ae3008b894263" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "4e3c1cb63d0a1b6ef9d15bcd924484c8" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "ccc3cf69d64569408a8f587d9edb7351" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2e9d918078af5e4d2254fc2526433a19" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0a4e09a2f834364392285045a634ae70" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "cd6edcdb752295b372941c94ca302aec" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c115171e0f7227a227c951fc56f8317c" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c62ba466a6d799e2f422e6d678c74791" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "3a48f4fcf6745dddba243ac4aa2a6916" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2a2e9c21bad4d154944ff05d8d853af3" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b4ac5fe36facd10695bcc721f0393c8b" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "f81d65232864b1b3effc951929b15e24" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8603c4753fffa56ef002b10611a8ecf2" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "7024e5d45953db498037827cd6ea50e1" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fa1b7011c986bf4f2ff1a9c5dc0cbcdd" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "70bc60c66c7517c343bce729791870dc" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "raw", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5dd5af511b26432c8cdd6ef84650efde" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b36c6c9d18925fe6d6f6d22e08455b53" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "raw", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "raw", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "raw", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "14c9fc6115f2a95a75cfddcee76c9804" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 11010048, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "raw", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "raw", "nbytes": 1048576, "byteOffset": 9961472 } ], "md5sum": "559f55a34b2133eb0f67e7759ac4cedd" } ] }