riczhou's picture
Upload folder using huggingface_hub
40dae8e verified
{
"metadata": {
"ParamSize": 195,
"ParamBytes": 16060522496.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1050673152,
"records": [
{
"name": "lm_head.weight",
"shape": [
128256,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1050673152,
"byteOffset": 0
}
],
"md5sum": "ab044d64a21c0b50372b39087584bd28"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.31.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "e83f1e2cc19089959c456b650f3198d2"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 1050673152,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
128256,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1050673152,
"byteOffset": 0
}
],
"md5sum": "9173e0a64e7a2afbf7246af0d61d0f2a"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "074b67ffa12db1f84b189b34b9f01706"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "130de9314f97a23dd50f391b2202f49c"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "273d15fa6b10b27f056ba451a292d63c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "74290290ded4fab3209aa7323f851e44"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "6473a4ef236986c12915d4e5ae983b0d"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e511945ca4e31a37fd5e420a49267683"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f98f42da50a5f87f2f01f00b8c94c141"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5f19dbaa0cfbdada8479b2682337c567"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "1a7d2e40aff4908a75b9eda89d4b62ff"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a7d327377b578073545d51e0d01c99a3"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7afdd4b730ee23c49cb32b0beff83208"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "38d47f9c02e6a7b414779287464c73e7"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "1238b1933fee5007cca7c0adeff08e89"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "7dc2985efd9334cd258e56f5424942ef"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a1c6847ff24d43edef06cb3401e8346b"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5f50a02405c28c0a5d3d6627cfbf2ce8"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "b64e456f270c948f8467d182ab71061b"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ed8bd7fa1f3c5d360685c1560754f3f4"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "836d5bebaf525b7535d08b8f551f49c3"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9a736b49ceb7d7750e073c10942d86a5"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "bf8878e4abac3fcc1053133561d621eb"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4e45d5a5990d85e804ec47fd4c1de62b"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c249ea2d99db8e31414b679a75b309e7"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "069b27afe17a1a3b898dae0328ad4ca2"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "79c6f7d8f386e769030d53ac05293932"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "cf4f65e16b33af859268b3662a325fab"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8d86764287cebb9a9e3baa5bb73a8940"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1b0865540e183e98aa760575b63bdf88"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "0cda178ca4e22a56385ac0549ef45757"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e5516292dd3ef234329f688b75ecd38b"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5f0369c9c3ae61a56eb74ca6b4b3d8f6"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fe3ccbfda9d40805e3a3087a3aca98b0"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "32f69edbed11bf4a097234262c5a6cee"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "fd349d55b440082f71a885698869fd0e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2866aee7238b305d1d42e41c1f18b9e5"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d663d2dc7e1f91d1203d17d005093898"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "925c2ab44db524ac8a59b3363bda2c22"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "1a8fa4fd2a4f3d5b9837ef6efc6ce151"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4e6807ae0e9097d6ab0fdfaf89eb39f3"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "323bb70d863cb2c144a466e53cbe03ce"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c7f321fb722164f56e34c5b43b2c4ea6"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "b3b5c0558a96f7517932b37e3e0f6911"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "fec6470984c0b3280a05af5e76059e26"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "abf3a9ea6d9f045b9c2c5e185c6474c5"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "233ba942f895e08928843a79f64417b6"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "8a01562bade836a614091d72ca4ce5c0"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "878d80c7675f15369704337c94ec10c8"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0343a1994ccaf3cf0a923061da2e2126"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "cbdaac3fa8a5e7a9778ee7ee59e25988"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "82a909c10067de8c593c24d4dbfe51db"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4d9b157a5bedfa17fe03c5961928b010"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "40b6be3846b8c34811c7efebe828d743"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "27f032d257c560ad1db0ad1718c0713e"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "30a98b08c20e73ab141879f2dd2a0c38"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "760747b13020079ff659555e4febd337"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "65d1dde6c76d9f529762854aa3623e81"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "f3cc031a15ce0ef0e0076957ae2e71ea"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f73685842211aab8399e314554b9c0b9"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "9c6f55b644e23cd9bf834db37eeaa27a"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "003166d50193c9d3c2c7c88990b6643d"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "3196db7d3c4bfd84419c6186a094fcc1"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a2f51f69d620156b458ac8152358bf26"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0f24d454dffc08bb510649f160e2250c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1844e2eacfbda5edf23c6d6d3cefadea"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "003ff48334f68ead99eecb619a54776b"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "94d130b9f23bd832aeb921f1001b6d26"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e6a47a48c543b71deb13c83792533b17"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "03917b40df2b4deb6ac3aae9d6102deb"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d02130184bf405140af14435bc1e9a23"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "d8c017c803f5c847b44bfba581fd3803"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "de1e850cfbe3158996546bc21fbd603b"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fb81401942ac83b8c649f7df1f102253"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "a003022bd587b79322bf2bc56f940c05"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "cf8b74128e12061958317edb57df8b4f"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c9f660956b4eaad4cf7b2cb96650ca0c"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b8580f8d001049726c59d42cdb71828e"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f233cdabc9ee65792ebd6eac9d1d9874"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b3e5717dd467c149539e38c66c5bc5b4"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "85c9f9d72884046c481786edbc0587e5"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c6c330b693e650e5ab0e1c561324f6c9"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "331912e06cff408b4d0425f7a3e97d4c"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f386fefd4c982110922135655652d991"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ac19ed8bf3a33c98459116a83c7dd63c"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "b8a89f9821dba551444ae7d18b85fafd"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2641b19a83e4d29d3f346653357f39c0"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "c358f9fdd406d0e40892546a565e3a8f"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8422e2ed0a8b66d76ee6df505748a6fe"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "46f693e10b6ff3b90d2e3c0968fe2087"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "e33ff9e6be0654fa0f7e8c2444bc4295"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "b8e3e09f36f6a8208734eeb0f79d2271"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "52420bd814bae89e248cc5e1955a29b2"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cdbdcd48c5b8374c2c2442c6c7270a2b"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "9d188b568c15b2a12dcbcceef31f11f5"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "19e1521d09db01626a3782eae391afe3"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3dd1949840ffe0f40b054c8cfa0f957f"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "537faab6244365e91672d818f15e2c36"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "3d73e1a0cd759fd5203ad309e1e387ad"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "43e4de1dae3d005cfff0dc4fde9375b8"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e5c539b8856e7f495aa1a7ffd5b85431"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9779dc4a784e935250107c6ab5d46741"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "e7d12480df8ccd276dc9c50c3b9231c8"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "fda30d80d4f98df483fd613932a3aa4a"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "27c52e2b46b89e5452f2ea82ed1d9227"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "88a047325b789bf72df5c46f9fb880e9"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "7ff6651b7722d3d7afc37f8269e15661"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4ee71a7f1cc42eac5e0746e37da687f3"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c679b63c5b85d3fa15c2fcbd49fbc48a"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "bb01183b273e4ac7626e02e9c15d50f3"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "28ba9167c2ae77bae8ffbc798baf4d9e"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "fb236297c897d1cd61904686d64ad57d"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5006bb6c3349112342a2d2251b933664"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1206de1cc36f390118834aea286669d4"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "48b211965442f9ae7d6160b63dcbf305"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "bfca698ecb180ae5d6ed4926f6cf96a5"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "53683c48f4ba912d5385478cd0fd171f"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8ef7f64fe2b708ffbb74d8a9d21e67f3"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c8ade2308ced3a863055b4e93604935c"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "048ea17d590d4d44983d33399916f192"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3b528fdc45d7119609d89dad205e005f"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a3e45c58a78945cad031d419255f90c4"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.30.mlp.down_proj.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "df06e6e191ec47c124fbc4fba84f4436"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "7941bafbff13f9a337e5132077d80067"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "963452db05d3fa0f9e790b27c36ce354"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f606499d17e01e1eac81c86949643504"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "9bd4db193283d565489b80c4bc364b6b"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a659a0664be1b2712080d21f91c8da17"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6892044b3ede70fbb5ab24d2dbc1a05e"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 532480,
"records": [
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8192
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16384
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24576
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32768
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 40960
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 49152
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 57344
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 65536
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 73728
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 81920
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 90112
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 98304
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 106496
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 114688
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 122880
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 131072
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 139264
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 147456
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 155648
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 163840
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 172032
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 180224
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 188416
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 196608
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 204800
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 212992
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 221184
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 229376
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 237568
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 245760
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 253952
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 262144
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 270336
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 278528
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 286720
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 294912
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 303104
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 311296
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 319488
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 327680
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 335872
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 344064
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 352256
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 360448
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 368640
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 376832
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 385024
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 393216
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 401408
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 409600
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 417792
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 425984
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 434176
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 442368
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 450560
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 458752
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 466944
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 475136
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 483328
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 491520
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 499712
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 507904
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 516096
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 524288
}
],
"md5sum": "60f39f4290c24e5f43ccb83308e13bc0"
}
]
}