{ "metadata": { "ParamSize": 443, "ParamBytes": 18167250944.0, "BitsPerParam": 4.500119595955907 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1048576000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 256000, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576000, "byteOffset": 0 } ], "md5sum": "c5d3083a8d2422daf2610c41497cec40" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 131072000, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 256000, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 131072000, "byteOffset": 0 } ], "md5sum": "7803fac45026b4eb384f417c0280667d" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0afc7229eae1c639faf96b6260671d94" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1b91984460baf05b58779a4c13305791" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f2c07997dfce37433288314e92a3f67e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3c1478590723af580bea955d3f87caa1" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.0.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.0.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "42c89d53adbb12e7cae97c2ca6351a86" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "bfd2e487577efae3ee789fe63f196604" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "53f803d643cf981f6c351b99c2f05701" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25165824 } ], "md5sum": "fed82e0fa7c3b451ac98c6fd77a50bcc" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e4901c356dd921daaaaabeb4e93ed045" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8ba676e3cdbdc35fb3458a22934f2b54" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "5bbd566f342f53fd8d8dc1f8c73c630f" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e5ee649a9f770cfa219cf6feadb25d1d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "079dd6ebb390f9d465f2223f6fe1d6da" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "adb32162fa406916fb0c014a6221067d" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.1.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "03a2807d916a555288256c9abe0cac88" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "afe5bc10f587902b1c0ceaf606101bb3" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "944f559b66186092f678817827f2e2f0" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "84f09462e4dc4f6900d6b2a500544196" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c76fcc04383e93295c80717faffb6050" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c1e7030e13b7630f71838a60849b7b95" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7b1950ca371f6ad968f75f410a8d9e1d" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.2.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 } ], "md5sum": "f9a63d3eef7c3c737abdbd0f76db0219" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "686d8883378b018a30e369c985db3335" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "90633cf233a2913b73fcbe1e30c7e457" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f2e9330bd5bb74b6ccb03170ccc241f1" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 30408704, "records": [ { "name": "model.layers.3.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 25165824 } ], "md5sum": "725cbe3e3ffdbdcaaf19ab5fa56731d7" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3da1933d4a1e62263e44f96c04cfb3d2" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "edf897db0117039e83845b1c45698c64" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4194510b9f1ce6b5089bec74593fb195" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.3.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4210688 }, { "name": "model.layers.10.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16793600 } ], "md5sum": "f48d4e86bddc15bb0e9a3b2b3a4591bc" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0a269c9285d58e94a92c01de8a0a7f4c" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "38d90096f46c2d30cb78ea7ba371a91b" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.10.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12582912 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12599296 } ], "md5sum": "75db9789bf33dd881bc650534ae6e901" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "59947d599054123161fd0ea1ebbe4801" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3406c5420dc22c98b96516fbbf92d0b0" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ca886e8e77d7b3e0da0818c48c83ab83" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 30408704, "records": [ { "name": "model.layers.11.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 25165824 } ], "md5sum": "0c4bc3848e8a9967d79def11eb09efd2" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b668d5aac90dcef2dc5a7a41e241ea6a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1f3d89755746afdccb5154f349b8ba1c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "44cdcb58cdf5bcbbbc51a39362e9ce75" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.11.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4210688 }, { "name": "model.layers.12.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16793600 } ], "md5sum": "da697ad66fefd5df7f471044783f7c46" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fd7c16477b79550e26b4cec4ce235d5c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c9d30538fb9ec34f56774cf9adeeaf3d" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f169df3a7bb7bb647c2d325d94f4607c" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.12.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 } ], "md5sum": "80e9f45401330eeb99ce84726a4d8498" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d689223b8f75218b32b9b7f9a8969c1c" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "037cbced0d23236d0d9ec3bf674e76cd" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d6098063522b220cfd18c6b1d12535ac" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f406b7ebc6f7695e02c373f3a68af4b7" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "917e4c38575ec3db8da3e0c357d897c6" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 31473664, "records": [ { "name": "model.layers.13.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 22020096 }, { "name": "model.layers.10.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 27262976 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31457280 } ], "md5sum": "91f74fb6625af2bff9e50c2897e7d625" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ba69d653615b49c77dfd3cfeee076db9" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ba8533f1fca7c25dfef335fd69d475e2" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "9da80bb55a6a4cbc8e1df3b54ce6f7f5" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3b2c120b449054347cac15895aa84f29" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "405f3edc7a45d9c744862da373043043" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "015e81dda9a55129afc029751083c070" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.7.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "9a534adb4949e7a2c050ee0d856b1c8f" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d51576deff9cf5bf75287e149bfb58b6" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2d6ed7e5889220dde7b91c404c2d81cd" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "2ed4c1b2b1a441971b058cd92f1e9268" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5ad96899a5ba1307226ed6b142d1b950" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1ffa378d0d4fbf4a5008c8ea0b6cfb8b" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "abe9dae34a4e82ca780e99d3caa4dca9" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.8.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "18f917be7d3f8de0d35abeaf86dffa4d" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "548dad444ee926b5fc0136e18fdd0a7e" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "24e98f9c20bdcc642d89c7bbbfb03b3d" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "652b846ec021a280de26e927073de0fd" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f749f74fedf9a0f67bad4ae8ba29c7d5" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c8ec70a873be313309ac711a95563115" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "68281413defca344d3cf843f513edafb" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.9.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "0ad094bea81b706b64c3be254291f8f2" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c7a1dd448ed473d7b99969ea577b2924" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8d3d7f403e4c860f9ed4880653581669" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25165824 } ], "md5sum": "fa43d51bffbea4113a199e44b2a459c9" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f08490dfe9dc90d32d434416b536ccd6" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "81cb7466454d52fcf006af172f19f450" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "76da0419d600b090617b43b7a2e3dbac" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "94a248d8a4f6ef8f84ccc450847924c7" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ff0db7ff6165542413f02e4cd2c1985f" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c93767584728faabfff9ecb2b58b9a0e" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.14.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "478bb4e7a1bf868a5885152a04369330" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "65c906cc2701e6f2d2a84304ce773744" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6ec4687ee858e51f77d1c4fe4f204473" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "e3dab2848244f765e36d9196a78ebd94" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0ec87181e257c60eb1a2ebbb7021308e" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "19fb8028f448d429be7606a231d850d1" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7f5444a1116a8c987a889d09883dba66" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.15.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 } ], "md5sum": "11fcdec78b92b76958e71655980c98fd" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1cbffa7320e0a769fae1e4dbb33e6400" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1127a41b2436a11b54e120422130069a" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2742c6ae91c91130bcaf0111960058a1" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 30408704, "records": [ { "name": "model.layers.16.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 25165824 } ], "md5sum": "4c78f8190bdc62f552d8984ce90fb903" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fb46ae4159883080a34d6b441f33f917" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "aae771fb01ecd01f5d37d1d159aff989" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7869cd97245e9cb5bfb7fedeb1ca0243" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.16.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4210688 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16793600 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16809984 } ], "md5sum": "2c00ec279c700fd6f51b0c021ce59726" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1c5ef2f9c2743f442b28cb838bf86cff" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "37cab0c86b9a9c9eb516befab94b5ec3" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "15dea361ba7c95cc842a6c6022b9f33e" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 30408704, "records": [ { "name": "model.layers.17.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 25165824 } ], "md5sum": "2e0a031c58b69af93f8270c8567d6417" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "83e7e3facf1dbf309df1fe3d5b087470" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3b07297464f268dd52ed12231d2eedbd" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "00a10d74afcd1e3f2b2429c4be0cc573" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.17.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4210688 }, { "name": "model.layers.18.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16793600 } ], "md5sum": "6631e2352efc1e043f0eef1193dbf91a" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "993ad5d624ba876219cf66cd2bd83387" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3450f3cb9a3b3838b4c8ec5c711469e3" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4dc67f53973ed09eb2376f511cbf311c" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.18.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "2996772cb075a3dbcc7b20ef79fc8fc7" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "dd9f5c700653da2587cd2deaa1965e82" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "14642b02b288186c66d6bfaa739ac2c3" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "2fb93bda10a9399a95115b98aad0cf1c" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "367be01a41c50f2d3b7a1509d905e2cf" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b772e6bf541aeb748e3945239bc92a14" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "be01e96c3a6e1e377d1f6874684f7ea6" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.19.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "495bdf1bf40d8b14b36266169b2d2a48" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "655cb4f25810c8f90181f5c7bce02208" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d2206c47cdfa657d10794b69df10b9c5" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "4ae5027e39bb6bd3846daaad68045f86" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4cdfed76ef491702013ab7648330cdaa" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a0f6359cdc39d60a6aedb4a49b3c79fe" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "11a3b97435d1c5d7800e68393ef52121" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.20.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "17257d7b5af3db95e963f687b4a5c6da" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7671014de5b454551a343f85f0109ac3" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b5a86d1864e732f004a34d80051ba768" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "2d7a1c28dab3b70b797674da26155aa4" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "90766be437f1368f8b276f52495dd0f1" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "199c94fd208aa04aaa9068a1029abbb7" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f2aca788f52705fd85967475c2d21577" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.21.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "13030cefb4e3438db73dfe950be2c1e3" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ed664daea3633ef3e9308c18be257cbf" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "80478eb1621720dbc7c2fbe1ba4cfda1" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "407fa906d3d59ad97e67be1d1214d47d" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "57d43cc6b138c2a87ffcb5c5d63c2fde" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2efbdf41a1b27b74298ea63bb4de78bc" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9ff16b92fcc55496bb68a685aacf06d6" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "081ed74f16224b00a516969822bd3333" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e4fddb5dfdf50023bca7e1c5a83cd3a9" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 31473664, "records": [ { "name": "model.layers.22.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 22020096 }, { "name": "model.layers.23.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 27262976 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31457280 } ], "md5sum": "253614d5205de802a378c2b94bb42c57" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "bb867b647c2776c7d4c41af83e46d532" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8cbb37a698fab5227021e126696c3170" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "0bc0a49c78d18553f5564de70f9e7bf9" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "27e76439a97e8818e4c65f94ae5e3f39" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3df912b130150912f7bfb04e9581fd5d" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.23.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12582912 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12599296 } ], "md5sum": "d50f7eed9eeb03756832db326d0ea418" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "febebbeed03e4ee55b884547f182dab1" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4aea462cc5cc9d6752889cf51a54f5a3" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8363cbc4da7eb114810f018fa61cefd4" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 30408704, "records": [ { "name": "model.layers.24.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 25165824 } ], "md5sum": "604b3b48077ea664ca891f25a8772a1c" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "54adc99b63645320eca047a0d53002fc" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ca67b637983f73b6340a12a6ccb3b45c" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "883be0d729fc369e3f8c07f52b15cf4c" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.24.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4210688 }, { "name": "model.layers.25.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16793600 } ], "md5sum": "e6f5701d0e23e244c65a4ad8a1c2abc7" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "78cd3b237390de448a7d9517eb18ca04" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1b277acdc6d03d212bb3525db57ec101" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a98938f7e5b46f699da8a8232241bcfd" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.25.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.25.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 } ], "md5sum": "2739734e3cd89b2ef79360e9651837f2" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7d9c021ea3e9e5fb13ee034ffc116943" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1372ec4418bdc3e96b8feff440f66c56" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2796a759be023fca18c4e49b2a3b4ba0" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.26.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.26.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "3e6218b6c95f476ec4cbc8d7eb030104" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "95fd8aef850cdba19793ff8ff433781e" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "012c331c996b173e946f0bb5ea397ae2" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25165824 } ], "md5sum": "b3885da6335e2090600faef6f7216c64" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "efc4cd6c926916d01d90697347734b2d" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d21b913c175a28da5b67c9725ef16a6f" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "920158616356d9fb750713ed0e8ca0e4" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9ca35b4e3a676c595945f6b7e9fc4810" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "adbee9e546334ff25f1cc376fce2d95a" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9ebedb7dd3e1c6b6a81ef5a52bfbe2c2" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.27.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "af12949f9a754ddcfd9c2fbd0cbae023" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "71f3fff4cd5d55f0139c37032e9a7bb4" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0ee5c5d9d64f80751367c7cd40d0f69d" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "6376598b3f7d25978390e336d9e2da7d" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b3f54f027d412bc0082868438cd57ed3" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5411a9d9bcccd8ec61298a9e1a3a4946" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5877862e949bc1e260c9aa932b1ca47d" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.28.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.28.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 } ], "md5sum": "18e9b23348e69f1052442ad57f165a4d" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "36ceee473da91077bdd4c9ae0c6b3e8e" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8fd0c54270c792fe5560bf3f7efc0198" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b70c33126a3af7c7b972fff3254b8d3f" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 30408704, "records": [ { "name": "model.layers.29.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 25165824 } ], "md5sum": "bea5b7a41b57226ba5b8590aec474e3b" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4c0dd84b7531b769a9e7d0f27229a694" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fe31a3ff8ec2470c39e56611731d37d6" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "de1e1ac0224210e08d27d9ebea80c9b2" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.29.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4210688 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16793600 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16809984 } ], "md5sum": "d92fa4ed737b5fbb947da65531cb6e0b" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b3e074b27aeb78002c61f652ccfd0b3c" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7d17c52fb692339750b24b76d2a8ba95" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "91cdf175a3941e4a5eb4394c04d35cd4" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 30408704, "records": [ { "name": "model.layers.30.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 25165824 } ], "md5sum": "cd65c9ec052b2318fea7736e1567f013" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8948ede8e2d3b135c7e03356a4029c3e" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "75cf21a649b1e5d5f09c2e828d2bb019" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "23a53fb92652425f539b2ab7ff532bc2" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.30.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4210688 }, { "name": "model.layers.31.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16793600 } ], "md5sum": "4c26d7045b8339b0519930cdb0bbffd6" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "09c04e2ee2608e8e7563dc653f8c9085" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5178d2eb7e8d28c95f4ac30e1538fbee" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "78e488566db95738296574ddd47e58d5" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.31.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "78e1e1e5974a206e5c66b94b01998cd6" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "911e19d18e62f55a6377d85cdf79ef90" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5175e14d1babdb641505d3d225213028" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "9ba4a16ff6ddf0ac05b5ac84a46e72a8" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e261f0f837929c1353eea9bb62dca829" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0a6690417d9c40185d4aa3a6079e0acb" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0eb24a0d0f54ba6c56319a9ca758406e" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.32.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.32.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "1e070d79aae81a1fee5471080d03706b" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3be032f6fde4b0e2c9f06a00a61d9752" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fc6241a3cb59754735cc2d80b492934d" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12582912 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12599296 } ], "md5sum": "cf8167ca3822a2425c8465945af0124f" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "79cd624f93cd67ee7dbe9c81a86f4ee0" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "01d03f90201c018cb9c7684240e0ab3b" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "42a88c5e991f56bce906684ec52b8ad0" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 30408704, "records": [ { "name": "model.layers.4.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 25165824 } ], "md5sum": "ce528af5c7a5b5b5d69907e120110876" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f1d4a53f2a843cef2f0e7e02b2d242e2" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0991b1a4a33776ee2502664f69834c38" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e2d67bbebf8a4c55139918775c7513ec" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.4.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4210688 }, { "name": "model.layers.5.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16793600 } ], "md5sum": "714f0f1950bcc69858b4907c83826396" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e7b47ab702cf311fd65950e0b6e68e4d" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5170efffb9e4c7d0a8d75395894132d3" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "06483c2b255842e78204368a479eccb1" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.5.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "6ef7f176a06e94352ed0baf0bff1984e" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "aa8a768fc3d7c7c1b692d813bb394193" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e68a6b600dd18121bc0e0446fd48badc" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "648c4b4062a80a75cf6408023b6e8901" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4eeab07ac289ae1220341569749ba8bb" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "43cd0011a165b7f4fbb269b919934c27" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fe79c2bf46b6bfe9f0c15d428645beed" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.6.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "44fc71eda2ae0a0e3c7445d8f903b21b" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5d6d8433304184cc5d97fdb2bffc6782" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "49e67bad8e07a96d58c588321eac19de" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.33.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "a2405a01971b42ad2e2908e6d316c6e3" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "78effae2f932c438d62ad2c573a34578" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e77e05ff758051bbce50fce302e71cd5" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "36a79fd9dac2c22e97eeea2ab5813994" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.33.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.33.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "2d801bcab451a0742c7b6f9bf65e9673" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d479c98ba0ed88bb8f8bfbcd9282b6bc" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "777c53d37f58448e61ee7dfd2ca8124b" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.34.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "4e4d4f4c232e4445bc9f89d7bc95012b" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "deca0639b0ccef6bb27fc0e512489b9e" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ee44f2c06fdcf9e12cb4e5370c0c788b" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2a35d2d0db32d2e8afe6bb1305440f51" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.34.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.34.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "1380ba41e27b95e4e1412704e004c6df" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5c416e5c89ec8817abd2ab846720d9c0" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f88c1a5a019f1666b8ce54f05d0a1f2d" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.35.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "62591bfb3a04cad63af54edab2567674" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "76c78b06f7c8b9cd6ddbef59abbe6b6d" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c198d239a04aebb43686abca3d18f85b" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a569ea42fab92c3da4b1d6856293ac24" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "64d3e0da35a237dd06d1ab7923a5e30a" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0ed7118aaaa0aa982ac26216ec129108" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 31473664, "records": [ { "name": "model.layers.35.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.35.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 22020096 }, { "name": "model.layers.36.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 27262976 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31457280 } ], "md5sum": "1ec2260ca83366c0d82b205ddb074816" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fdad05fa73c5aa520564b4d1ff014020" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0af6a81bd74a8640fca3d877b41fb836" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.36.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 } ], "md5sum": "c59a0f911667811a32eb8f79eb2edf18" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "987c2303266ef4bf72bce6e6fddb686d" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d361fa9fd4f1d532cb97fbb05dcdaf41" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.36.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12582912 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12599296 } ], "md5sum": "340e73f5b4c4942fe2539a9fc79f59b1" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "def9e645888cdf9276216a82e15786ef" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "26c03b8779ac0c2c83f163dea7662270" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3a53777ec83764d049d500d065dfcb0f" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 30408704, "records": [ { "name": "model.layers.37.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.37.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 25165824 } ], "md5sum": "a9525c5314c0cc813a1a56ffe761cba7" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fadcb5d67060b71bb3e4205a0fcbdbef" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a72a090402be0649130c7a217297da02" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3ddbfdf5ddc4fc19c283bc1c9bb7efe3" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 29376512, "records": [ { "name": "model.layers.37.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 4210688 }, { "name": "model.layers.38.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 16793600 } ], "md5sum": "f44f4b29331fe6735429ef373f4ad943" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "303b5db25f694fa910bc3c8509f6f515" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c4e795e1e0ee7fa92192db669292920e" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.mlp.gate_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5ce24bc64a32d89be5a66db99299d1ca" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 22020096, "records": [ { "name": "model.layers.38.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.38.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 } ], "md5sum": "7ccebedeebcbc25f099478a886939fc5" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9bfec26212f6153695a7bf94b7ed8901" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.out_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "43c4a501ae5c0677f3777f18969abf1d" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "368521515108205d26c5353b9cfd6b5b" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.39.mlp.gate_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 12582912 }, { "name": "model.layers.39.self_attn.out_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 17825792 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22020096 } ], "md5sum": "472cefa815c9075f98e8eddcbbaef27d" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.mlp.up_proj.q_weight", "shape": [ 24576, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e09c7ddf26c54324ae72d9484eb8b736" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.39.mlp.up_proj.q_scale", "shape": [ 24576, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12582912 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25165824 } ], "md5sum": "5f0b7ad0570878b150bf0d46ac6a1a03" } ] }