{ "metadata": { "ParamSize": 135, "ParamBytes": 2200096768.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131072000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 32000, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 131072000, "byteOffset": 0 } ], "md5sum": "eff73a0fd7c35d95ac5ee2b26e579d8b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "1429fc4093dc297427a1d203e707a249" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "a344dc45356c7e4f600da19c1593c300" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "4cdb0f892aeb98c7437ce93ffc26f068" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "6a8ce1a0b8b1f429b074de2595baa650" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "bb69861eb1057665fe422716af9732cf" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "23f4d96019adbd875c8c59dec0d2c4ec" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "76c7bb449215a4c142bf34377cb9ce06" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "0146969afe6b3f5c8b1c723da284f2e3" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "bad7f4dce5e722bd6abe9fac1d90eff4" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "248d134f8a98490d7b4bb7049ff6fadf" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "f9ebe9f2714624fbbf8250efc1b0c844" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "6ac19ac227656ed2a9c0c0c4207a6e0c" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "dab1a17c6787ca0f2c97e7c4242e8fd6" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "faf5f505315df5591917b35220d5c4de" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "c2f11225f3c5733a1a4d184c1edf3c48" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "063dd74c0700b250c9ed1dc1893374d9" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "9213875dbdfcf3beafdacd7ede601517" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "922c6ebcebf9aa82429d7d8cc10996a3" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "aecdd343857e3aa80bb8c94902c143c6" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "aac6be0171e02618aae40545fc5faf8e" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "378996eebce2626f3e75e8cc1fb75cc8" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "8bcf35bd8fc1aca4e79f13c7eeef12de" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "59703751a89011572ba8b8cad1a4aba8" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "404ee845876d55ec540779bb5b6c34e3" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "052fbbd438265bb5a93a5084015bc992" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "f023907997c3ed547ae03bdcdefcd1b5" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "90f22652a80539b61d36867074f3204a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "97016e4d3dbbdf58594a6c4f0068b87d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "04a4e35f6c0ec7c2e6ae3e00796a5340" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "bd378eec1efe504b556676ba55a10fca" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "60b30fd02636aea5794d7335824b22fb" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "a768676b315d80ef29fcfad22479586a" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "0ce96d9d481118f7ac43dbfc171ecb4b" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "92690ce271707746793acba0ae9e6ffd" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "c870963c9079f502d95ed8a60916aa09" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "7af04b96dd3e71c5e3a1f44ad173bfea" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "1cac728a2b22e3b77fb5dbbd1e9229b0" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "4b5371f16710560fa21e987b817454da" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "cb4c8bf7627c3697ba73cec668f987d1" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "8fe785d8625614bc39f3ed1208e58474" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "f514dd7d4766f1c8a60da5165c242ed2" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "b13d9ae43b7fa845b48c5b6de94570a7" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "2cf9caca3db423106041ffe5cf949e53" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "1d4c5281fe2e612feeb25bbbc88bf456" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "c46540c97c546722e81d362b9b2053d0" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "92534c963098e40f8e494024f7cc171a" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "9b9d559b2302c551f2433ffe8781934c" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "7089659a826cdffdd85fc286918b3c0e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "0b818102d48e4f8060cc389ba0d8c7bd" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "da376ebd90a6fb758ae26a1e5f1bb459" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "07983db1555010f4e29efc70ae893c31" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "8f144f9da86834343511a82586a630a1" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "d9e65b87963f1b12a61db61b5e22b006" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "cca1c44db48008d5c6c9ac8ebf8b4725" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 131072000, "records": [ { "name": "lm_head.weight", "shape": [ 32000, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 131072000, "byteOffset": 0 } ], "md5sum": "7a3246d2a0c89b32df828dd06944d490" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31465472 } ], "md5sum": "697def5fa1e5ab0181768796ae6277f3" } ] }