{ "metadata": { "ParamSize": 98, "ParamBytes": 2471628800.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "96ecea76fb745a56ee7bc9a00606f3da" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "45ab6a597571f55d11310ad502ecd11c" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "72068393e1767a06fd5d059251fefc72" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6fc65384fd5a6fb03c461e1022ea887f" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "cee36a8591282bcb9d842d0f32f1f04c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 20987904, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4096 }, { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 8192 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12591104 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20979712 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20983808 } ], "md5sum": "2f7e6171ac0351f78163af8b1a6e4762" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ed36c50971738bdd08ae2ab1150820fd" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "47e9ad785e2c976074789c8b5d83ab5a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "3063f6be2136e6ed98c1d73b1e0ccaab" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1264f1a44eb9ab68745d01ea53da5e0b" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a19047b7973f1066042d0bedaac882b8" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "4e8e76bfd35611401d5396504f54c276" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ae0e5310659ab9716e61d03ec2114693" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f6a2b966db0ff70ba3125c96a5e6f8e0" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "e8004ca46980e8515eab568c169618c4" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b06dbff200487e8963abfe2a18faa10c" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "bd87851fed4a03435b0a28d9f42447c9" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "27d40a71fc5d26371d8baa46c8d72b31" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "83337b46f585fa676dcd5a8761ad3df0" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5082712a00fe7140ef9c9215846b3d60" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "7f3978ef783e8e42895a2486316f9304" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "622b5ce5905b898048f2eaf79caedfbd" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ace31b45f71cf2c0bf9b1056d74e1fe1" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "2f7afc97c52867936a854ad44fe0c191" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "494e89ca81bf466c8ce17b5ee5f85cdf" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "13c70878199744779a54933d728727f8" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "f6e5b110dc063026eb64f03b3fa20bee" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "910d39d3f068b37b2df084f9762571b4" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "49f61ec31d545e7569fb2a44a2b7b07f" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "1eb0bdc450a0fca3053b47dc24368565" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f5b3c59b6ab43fe2f4892ad53d246905" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "38d72936eaf8dc4bacf812d8fdb2d551" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "773e500fd6e2ad9b4b9306b1e94af630" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "90826d2d305b088531c014dd0ce66ec6" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4a44e23743fed303acca7cd7d910a8ae" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "06a369e56d5da237caaa2b9cd972be08" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8ba19ff095dc05dfcea429ff62adc5d7" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3fbdebbc72ec4e5529e3e90a96d9ddec" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "e0c825ac82f14adb91b255cec234cd72" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fef5beab293cc482e63947c7aa133778" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f219b3aeb641094c9506d63d34f61594" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "afe26b0283ed633feaa3784374d866b3" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4f0cd14ab865c2f8553f79466e85a6ab" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e47e8214362118313253663b2c3ce43e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "8bdcc2a37898c5b61dc2e9d30f7ffcc2" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "32e3d2650ce2fcb5ba7e8ebbcf7f8b2c" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "fe1013105c419278c23b765e981940fb" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 20979712, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20975616 } ], "md5sum": "9a3e24a308a77520ec39767e540973be" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 20975616, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 3072, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12582912 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20971520 } ], "md5sum": "4d899f886c5eb1df29d31ae4897fae61" } ] }