{ "metadata": { "ParamSize": 99, "ParamBytes": 1027181568.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 98304000, "records": [ { "name": "lm_head.weight", "shape": [ 32000, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 98304000, "byteOffset": 0 } ], "md5sum": "4cacaade4fa9480fcb128b01802010bc" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 98304000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 32000, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 98304000, "byteOffset": 0 } ], "md5sum": "128c3e824ae1c77e76b7bebec282b68c" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "73308bcc241ddfe9a276e1365343a9a5" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 26747904, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 3072 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12585984 }, { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12589056 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22026240 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26744832 } ], "md5sum": "61ffd05d3b598a4bf392b2883678c6c8" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3e08d4f83d0234b8f04ee5c08686ec81" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "ac190343ae5018ae4ff4b3c72668e64c" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d150ae0a002e81aa75d3a9869255318f" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "729fdb8f0eda6fb8be72c549a265b99d" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f113f6c464d7323085f32cefad26d060" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "a94367f453649a530637805bb686bed1" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4c7e4e11421a7e78191585cf85e6f492" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "f7aff300446f28f284dfba1c059f1df6" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a1c8805615170e587880431364849a04" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "a2759da9f1b7863921b4196085a09084" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2da23d083df683c1dcae490831b95460" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "0d74164cfbfa518ba337ccbfd5100d28" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "33d9a7a41dd0f541307ee90b4ad70328" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "2f7d100b62508b23d809954b86c1ee2e" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "30c70e6edd714d2c85f9426157e8c80f" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "3fb2640e234b84defda32dfc1a42104e" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "53150dde82ef99fd9e1d6348d946f2fe" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "8d367d8f6d1b43767339847d87c0540d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cfaf8058ee5a8a20f44eb27a0b7b573f" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "a165373f294ca4281380a26274ac9664" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "31865399476b425d4ccea3d19c707990" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "7cff33dad5234670929d6ce9d8f823ea" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "567b76989079f6b8afb9883c30f21dd6" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "3bd295d16385959c396de51b03ce56e1" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bee870077fa362451ffe490753151967" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "9ef1e19c27ba9772c11e5876693706c7" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "01d39adc4287100619cd94e0c84e4c61" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "3806098b62919ec3c4deb773c46d177d" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 8192, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6613b12dd755ddbe02dfd8e7640d1b6b" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26744832, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 1536, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 3072, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 12585984 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22023168 }, { "name": "model.norm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 26741760 } ], "md5sum": "70dda49a243e5c0a7bf48b85b8a426a1" } ] }