{ "metadata": { "ParamSize": 45, "ParamBytes": 789200896.0, "BitsPerParam": 5.000788364646225 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 113246208, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 55296, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 113246208, "byteOffset": 0 } ], "md5sum": "fe870c24c37d50833f35e5356e23fa21" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2732fee74ddceeec2a570c77bcf3a75f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5583cabee6e1df6bc137f2495e41aacb" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "cea6f0488cca4a33509e06aeb87e9658" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32374784, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 55296, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 17301504 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 25690112 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26738688 } ], "md5sum": "b6cdce6d80073579dc41fd1cea4b3f2b" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31145984, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 2818048 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 2826240 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 2834432 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 28000256 } ], "md5sum": "24947fc54d4d479312e736080fe47c06" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f81606af90319436b0c52bc3087a8a75" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2f3b104d6012ee96282f1cb68e9a540a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "92644b7b40d42f6a8f79a2780b4287e0" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "cbc67bc2b9741282cdba7dc02cf94d88" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 15073280 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 17891328 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 17899520 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 17907712 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21053440 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 29442048 } ], "md5sum": "99fa11d45465d2a56e742b32f1f9a1d8" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e012050a37a0fcec4e06efd599d37d51" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 31014912, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 5636096 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 28180480 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 30998528 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 31006720 } ], "md5sum": "4e0c7558b16811665b346390a73a8911" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b969fb28468abdae8a625930d384bdec" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "93ffb5c0be2625c19d8f1411fbb2e187" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 113246208, "records": [ { "name": "lm_head.q_weight", "shape": [ 55296, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 113246208, "byteOffset": 0 } ], "md5sum": "ad297ed29c5c64ff691e30d12bfaba09" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 21061632, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 12582912 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 18219008 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21037056 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21053440 } ], "md5sum": "3865398255ece47a36de555fa54b952e" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 14155776, "records": [ { "name": "lm_head.q_scale", "shape": [ 55296, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14155776, "byteOffset": 0 } ], "md5sum": "2b16bda1d0ada369b8f31ed936e081f6" } ] }