{ "metadata": { "ParamSize": 707, "ParamBytes": 58394173440.0, "BitsPerParam": 4.500051549843175 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1572864000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 256000, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864000, "byteOffset": 0 } ], "md5sum": "ec3e21d9dd46c9a04ca52e50b2554ae4" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 196608000, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 256000, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 196608000, "byteOffset": 0 } ], "md5sum": "7f8473d4d21a55c43622d82021695c80" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "b386620a04a68323886b6222dd73c5ee" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.0.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "63fbbb969b9503275cfdb084ad83a61d" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.0.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "b031aad87f5aa98937114d7439dcb673" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.0.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "75f7d6e3066b92c16d9be64060ec17c1" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.0.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "553fd8612d9cba2ac8fe146a15ed3400" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "31ea81a9a9af6f6c579164a492baa9e8" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25976832, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 24576 } ], "md5sum": "967a7087a43501119721d0f0a160a244" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.0.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f46db76ecacc9bb963e431b978c511ba" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.1.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "93c6e7fd6eb21a7c7042364618b72e71" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.1.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "17435e9d2a3b3c514a18afae5f9a32b2" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c011dbeef2a29546638cc8598c536889" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.1.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d8eda53030d27891d8e136f6f898104d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "c480d050b908f331852f4bf0d2e24f1f" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "83ede47f592ad837027af82bd4218457" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "4174829a7ba615f44d43c54c71231a8e" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.1.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "5d293de9f28d781a61ecb31d36e3fa66" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.1.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "f54039e7904a116d703ce4ccad0b0353" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "47ae51b8b2b20aea057f10a9e7ec3fb6" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "96896490ef738044730e56a01746dce4" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.2.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "6f9722ea8f33a26157d2a0c06d384de2" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.2.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "af7eba9f6339a66c041889afb6c97218" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.2.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "c56c67af434c97fe116c07160ebc42a0" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.2.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "5f289de554b5b847e0c6a386e237e55f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "0086df2ba54b37b78ec4b76a33fc6381" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.2.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cd64f46407756bd18842fa8143eb409b" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "be5e81ab0b81bdedff0e5090cfc6d244" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "b407bf7e60960dc7cf16d14898eee604" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.10.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "c4f4fba5324b7736e63a141c536a1362" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.10.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "281ab8ad22c7ac979a1c788669f85f21" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ba0b92448e925db01a95bb68e71106e0" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "f2b31655a8c2457a11ab4e6ace83bc5c" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.11.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "12014f5698b660199ff26e062b961f1c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.11.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e7ebcfcecb832825c021413968d804c1" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.11.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "bc8baf1473ab2527fb3330fde5666145" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.11.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a2556b6b6c6d8c3bf1dc2439b10999cf" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "9268ff589f6eb2458c0795655855b687" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.1.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.2.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "067b00df67ddaf07508ff6b7c0669bd7" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.11.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0bc1682f4fe5022a6ad4eeb0cd30fe23" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.10.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "9098f10f03ea2f6608806c19fe01642e" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.10.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "b6e945ca5bb11bd29ae9c75b0e777cb1" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "aee5071b20c9633be1cc9b42fa37bf04" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.10.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ea9371432b276a44caba0c8186a05524" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "ccd9dcf2f7a655890c2abd74443bebfa" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ead19ae41fa6354727124cd9e34fe560" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "8e2377317b6da59028d3eac3c0ca0a44" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.9.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "de50406f88042307a5fff70e4bed8e59" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.9.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "8c558fb634dd1890b0d77d1cfb94a270" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.9.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "67e53ab4e41adff89710527d86506ca7" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.9.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "746421d78204ee57b6dd45ff01c519cd" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "2a57f414b46fb462447c181bdaa70707" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.9.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "093b238d24e71d73a47ea3999e53c7e7" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "b85e3e09c4f68991186ca4da74f5424c" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "600c00eb02f74988493c94830aaa3eac" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.12.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "addeb3aead073976c9833ecac07af09e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.12.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "5ea451fad9506ed9ff695f551da9b2ed" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.12.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "3af4404c2a047d88559a36b16de1c2cc" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.12.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "f321921a9d0cb9426388b85135999d3e" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "b30fb969f740786bff25e5caf855df0e" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 29933568, "records": [ { "name": "model.layers.10.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9461760 }, { "name": "model.layers.9.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20471808 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 } ], "md5sum": "b64eded009316e37f75fc4691b15ac03" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.12.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f58e1189cc3a111dc224a7d2d31d37e0" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.13.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "2ba6c289b735b70d14d2aa084185db47" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.13.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "1b78a5615ddfb1382ea5073ba59cacd0" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "cea12e1bceaec5663762e2f710c6daed" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.13.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f3d86f199bf9f50ee089b780b6cbcdea" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "709a9baf03c909babf061189a29a0085" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "0b3a0192d69d1ad1957f2bf5909d22fa" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "58a2db60b301dce3b88c91e5cfe822cf" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.13.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "82af38122b95dd45907efee9e834fc8b" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.13.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "b4df25eb98af0b13adde61f225ade7dc" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "74727e35cf089fd290692eb28256503c" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "dd6ad77a41317d19a4851a39718ffa80" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.14.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "f13d1bcc939aa4128e16e13c168e2e16" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.14.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "74ef07f73c66d031a2316d9bc24d32a8" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.14.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "3f64cf05cc4a58d8d42e1b10e0812cc7" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.14.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "3d79c0be2b190d95add53c787e25f9f0" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "6017717d34524831e56c08ce1d54af58" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.14.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "16a2c42f711e57e27a655db6dbdf0de1" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "dc7ed3108d17135b23aba8035b5bab7d" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "0d777bd1837aa34af01f35d812ee9f15" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.15.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "34fa9e6aed6a5619c6aecdfc382b7209" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.15.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "244744ff41027c41395aaff8a8e6d9d5" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.15.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "58f1638c6bee1221ad9591684d7f0ff0" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.15.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "310c2b53aa85f695ccfc5e9cab51e519" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "018d90c7bb45699a567122a4eed4cd1d" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.13.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.14.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "56d43da3c2453bcf52fc9894ad072d80" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.15.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2e1f6f72879b0c2eac772653ba6b6418" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.16.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "dc6e98b5ae7134ed21d4f8685127a8a9" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.16.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "4a52b1b310aaf6185ee0d34d4349e4b9" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "6b9f62ff2338981d8b7f12dffa450cab" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.16.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ed9ddd86cf9a3630338bd8bd6cfa8d99" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "ac37a97eecbdc010a28bdf007c58e161" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "7279ee0dc2d1449c4a775b9d72471ced" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e4b6d86b2c1a924965fb4456da80b63e" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.16.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "facadd11b2c986a386a7533f1f649860" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.16.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "538028970e0b2d7c361d4cfd38171cb9" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "a10b5879a4620115d533bfb574d61c7a" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "acc7f74e34f3a78fac6f4b4d61614bba" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.17.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "663ff764c3af8810076c5cdb02bf57b2" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.17.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "54a8a93bdcd4317c5b99fc8642c1e2c3" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.17.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "a6ffacf942e183c68853137f5f646ab9" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.17.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a600aa3d233e8e75a357963bc854176a" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "7fd095e051a9c758078d4b997782ed92" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.17.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0a777217a3a259590131fbe7d436d104" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "0cff0c652535bfcccbe565e97c410280" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "239e54eb33ebb2547a3438c64006071a" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.18.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "3d1fc49da8ce01dfb5cbe452eaf7ed78" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.18.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e3e1b06c70a54ddf8b26da05ac79a67a" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.18.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "268c1a68ccc68ea31523edfe6b160137" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.18.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "b419218f8dc2527ad7fec36caf00024a" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "2ef94f20c0df30632c6e6f26636984de" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.16.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.17.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "b1439f37ee1fbe25a9047c18b1f69379" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.18.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "92cc95ebdcd9d0871df369fee6e8d712" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.19.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "6d722edf4b6808bfd1c28ea88b4716ed" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.19.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "09c77b0ea568760c38837c5f6a1cd2ab" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "93c0147ec37b8ea2c3a8cb83aabc175e" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.19.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ddf319b6e14e4d1ff0ca7cfbc1ef9695" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "4c50efc4988e644c17e8205d6bdff763" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "c8e389d4fa7f753dd741670a08ee3487" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e5e7c28a4452c5c2903478d3c1b81553" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.19.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "897f1866f1b1e120cbf1daa28a454307" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.19.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "977a0db30044f2e1ded99e196481467d" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "a058f3bbb1eb576614becd2a5e6da47c" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e026934393042ee7bdaf55924d6a4c0a" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.20.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "7c9c31f3b253a00e41918da3fdbcc72c" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.20.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "211dd755031f864326c91b3486e3c1d4" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.20.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "d6e4c05708300ab8998515cb1d23a8ef" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.20.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "96186e7467e0b1dba12457ba60cc727a" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "acaeee20c428c03b34a3d0a41cfb06cc" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.20.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bfcaa3f315a8ac10c6c9b8067def368a" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "aa3201dba79a7a8cf46275b0873b0586" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "1caa371db867cc2f2e5bb02534b58417" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.21.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "2f96828e86071595ee1f45feb0de2885" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.21.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "5805268a28657764c8c5c9d226177ee9" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.21.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "6706ff2f22f471c5de0f838f5595ff3b" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.21.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "dec3c3439f93840b681bbf38afd77177" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "edbf9683ce19feb9f2ccbc03920d4e66" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.19.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.20.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "d6d2a853982d08d2382052845055027d" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.21.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ad0a40c1870c9e64f01a74c5a302e0d6" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.22.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "abff5f64e0546aebf2b8629739a3238a" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.22.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "2ee811090b83009f41654cbd11cda7f4" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a550fecbd732181336b051ecdb365e47" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.22.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "fd8532ff0f275c6f977cb60bb619fbf4" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "8731465c7fa627d4cb4439fa24f69859" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "7aa5f7201b3b4cd195d92a654541cbd1" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "ae5947385511e0a70248f916e3fffed9" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.22.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "4c51b537e45867661cedf535ed4742ce" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.22.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "50984c8779f21e28c50622cba9d14a78" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "88723d3839901e1bc1f9f32e645f9bad" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "32feda2f0fd8d1b7f20dbfc2003bea56" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.23.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ea11316f193afac4ef3bb1775a80379a" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.23.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a7d1a3064c02c936c57860ce6cfa7839" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.23.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "6f364102a7116550da37bd8a06e9e83b" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.23.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "eadd8104c86b91eb64b3aaa281927485" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "9a8a2443b7e680e437b0a1e3bb78fce3" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.23.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f7990fec3c6cc1e87ba20c334d9c21d6" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ab019ad6ec2a8cc2f2533da128201540" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "ed55c839b3b73a68ea7abd8c80fbd757" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.24.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "fb72180ff9a01685d79f1de086aca226" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.24.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "746d7ef977050c92f0aad0f70b84e800" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.24.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "a5eef9601587704fd8978ca31405f9fc" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.24.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "3fe53945bddb5e7f13a9cce8e9b2502a" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a2ea17c00453b8f60a3bfaa1108e86bd" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.22.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.23.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "43cb47a94f06dc673303661d1d259f41" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.24.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "07ce162f1b66ef6fd862829d1258f702" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.25.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "fe6b4ce20f089cd123890a7df5780e93" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.25.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "39d4879a63344000e35e5495872e8621" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "cde77b645aed75dc1414706ba1e9d31c" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.25.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e11d58a147c67f8460ace4a93bd7f5dc" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "2d0e29ca4cb253c75aeb150755d1bdb5" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "5f328b90b1178eb59e93dd65981804ff" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "8fe205ccb41315d06f5175431cb37dcd" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.25.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "e0376daca6b247c7457102f3c6df8bbd" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.25.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "c081d720feee12ad47c5a24e23f201fa" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "a11558595aba80651117cae9a6c9240f" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "2696b13f85b90c83815eb5f8195116fd" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.26.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "3e2e86d35468e9548076f1aa021b5ccf" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.26.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "bf5d68e283fbb6655749a201db2194f1" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.26.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "b9cf51c41e081c7eddf5ff97971ae52f" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.26.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "25dfc2a80c1727c353b3c6e558817fa1" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "bf2d6e07f9f11b0108b39e0474fb523e" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.26.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "47042032ec8a5adbc830b7b7a6a30c80" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "c46f570ab76ac2ba000431c672319626" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "1685ac905edeb3dcfc7b12d12dde4455" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.27.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "2a59f6b72002d0e7535eac11ab9cd4e4" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.27.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "dedad700407a7b0ea1cab6adfb3e508f" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.27.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "a47ec34ce816600f9043b75c25432bc6" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.27.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "0301acdbfc7d2788602785548bd1d505" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "f2c983673363dbda3ffce2ebc11e73f3" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.25.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.26.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "462b66df7b536755eb357508c715c01f" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.27.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a6cca03b84c1a51e47ff941c35f25337" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.28.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "40cbad82edbdc6a5009ef92e833d097f" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.28.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "5e2876dadade116f332cacc02b6147b5" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "57b6e2e02af3a4243cdf4ede1fd1fbea" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.28.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "22879f161ba3091898746d1ab6ae7f42" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "504829fcb9f612e147abe6cef3cc7f37" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "5d705c4fa9b83f8d0f7d3e959ad8146a" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "afa06629da701c601438454d98ee6cb7" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.28.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "9841a00eedd2c955fa1a3dc176d6abf6" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.28.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "304254181c4521ad6d606d4b39268d7c" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "84813e1a219466b88c70ffb75d5b2b11" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "c1ddbb40fe8d1a5b75c46f2f20c6c62a" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.29.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "79317253d213b73b4a3a3da637e7e6bb" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.29.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "f97e4e0a4d7fa8cbb461647b60c839b7" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.29.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "20e1871c096392e8eb895c917c82fdb5" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.29.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "d71830c5f326e0d004e2b2d14422899d" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c338bf88561e61117055e03418bc88c8" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.29.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e8859a9a0ae4f181eeb3e7944e6e1a62" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "491314408d19cc403b3ac0ee2153737a" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "3dc760aa520344215921d84f95f426ee" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.3.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "d2e964571c181cd196d6899ac554d390" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.3.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "3a25926b72fe5a166193dca0564ffc97" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.3.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "64cf46d6a395b851915668d8dfb7e8e8" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.3.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "4c31a7ef1ffe706b8e97a8bd89070b05" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "28cc3e6730e7995188b987b8021fa1ae" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.28.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.29.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "3d2b6cfa1cd50ad6c6535d3fabfe5b77" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.3.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6a8e49b66f828f88b946b8948b39998c" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.4.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ee9ac03c0bc8456c2e1ad21378b89d65" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.4.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "b94bb40539045fe1f2b40995ccda86fb" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "62c7ba308242db5dab6128f274709bb8" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.4.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b7f64f96f8aeecddd7af0ee26b8f722e" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "547f48033461ef06eb60dddfd79f414f" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "3225b904d8366f529ec46a72cee9385e" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "5a3a437e74ad65d3434abe8cb49f51dd" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.30.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "636edc4d5e8d72c74475d77c5e594ba3" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.30.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "0ad548e726e63840376f1eb5b0b097a8" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.30.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "9d6eb955c6cc1b73b3244c75b2f147f4" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.30.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "fec40f17475d224417bed31fc9970e67" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "b35ffda485966e9ff243ccecf34a4ba1" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.30.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "24ea39fca537ac171034193392f3b27c" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.31.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "e0a1637ed73a9be72b84ba82b1e0eaba" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.31.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e799156c4382c3924f71f83bb404428b" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "f76e151e828ef5c5d5fbadbb18307e87" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.4.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9461760 }, { "name": "model.layers.30.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20471808 } ], "md5sum": "b9a9c4f1d709a45273b8d4f08eef51fa" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.31.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cb7eaf48c7fd67d004ac43fb5e207d90" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "f73e10472e7ef7a7dc8a31ba15a6c51e" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a2b6e26e2073654a024a56fea0e09d75" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.31.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "8af35cde65993d30c3d228af87caec7c" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.31.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "7082889a891244f4ae2838025290f4bf" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "1bf619a7835bba54bbf3597f3ac88c35" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "7c3dc4dd1be5d07f41aa717810ba8d38" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.32.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "a9c8896941bc42fd33a6afd09ffa8001" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.32.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "beb91fcc28cd0149a8c73adecd7939e3" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.32.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "b583380b2879ef4708f928c269670374" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.32.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "4fa767b8a9b1d30ff4d4f4d3b2944373" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "12f3e69f3177ea44698ce11381cce25c" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.32.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "821fbf6e7e4c2fdc9293df9ea1e670c7" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "bbcdde45125753fb51216bbfebc2eb83" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "0b542fc9ba3866070d42113cd5cdfed8" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "6a121504ca3ad9aaca2f2994772a89d1" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.33.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "af4e0b97045942332cd8c1223da5e62c" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.33.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "48722335911c07189b328bf60df92acd" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.33.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "cd66ecd873b43b9586596d2a5bd29e22" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.33.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "5e04cf9f744802f483148adfb8f625b7" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "fc72da85b179253b492d3ac0a7a8e1e4" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.33.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "86e0f35a2383d3467a0400058802a28a" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.34.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "c826afa4cef4122e236b1722beabc8a5" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.34.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "b65640f813fccbf415bdd7c88eedba9e" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "eb582a96c95c9e411dc804b14e69fe8a" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.32.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9461760 }, { "name": "model.layers.33.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20471808 } ], "md5sum": "fdee6096523b1ba89a1556ca9db532bd" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.34.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b3790c836ecc7f42f5215d6fb522cc99" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "58a4793a5c276654edb55abc9ca68e41" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "67e1991564eaa5fe4f870125f065dd39" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.34.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "d8a129ef0c75d908d5747e2dce0728b5" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.34.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "41818ee01ae1c415beb74261470fbc2f" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "b06a84ef7f569dba6c20a34417c187ca" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "552ee07c797059d6159846eeba30ed11" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.35.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "06214a15dca6b3cd0f5854f5f3218a2f" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.35.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "013677fe051a8244e513a62e57954dd6" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.35.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "16e7546cca17b0be3d17d7276968c42e" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.35.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "4a80e46954e2233913482105abd0eaf7" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "4ba68b534bfc7f34c266b36dcdd8d512" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.35.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "733d0022bfcfc10c5a135492e4870078" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "2d8561d4da66d19e2f1b8bc7ed118915" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "2792b2854afc8b076f424c8b3f9db042" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "761bbee468e9f19282f91a0c450d1774" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.36.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "57e5dc1d5bf9e1f61e6d72df484bce52" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.36.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "16bf98100a7deb1737ae6e62c9f8a0a3" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.36.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "930c0912230876af1dd850ab481a5331" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.36.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "422c5e35cb818386cceab5e67caa323c" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "cd6c9d67e17b0678b81492a8a6c16ced" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.36.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9213635957e5fff7fe3b6f5d9cc233de" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.37.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "27c5f18b723c1a69e0abc102498253e8" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.37.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "d380f3a1d5cf67496c6e5847f1ced93a" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c8ef23890d1a7f5454047c46e145772d" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.35.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9461760 }, { "name": "model.layers.36.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20471808 } ], "md5sum": "3a2e2fc756869dc306492b120634bcfd" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.37.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7dd0cc7501832c7cc1955e12090b75dc" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "01914ca6f760a351fdd479007c027d6e" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e272a30c99d88d9d0ee25e0621f36de8" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.37.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "4e4a41b5419d9ca6ad695bb8b533530e" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.37.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "86125340bfb512352935f78f0f23a789" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "323b6f6d066ea6aa7b74bc915722f64b" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "6150b8bea9420c6b9836ae91288123fb" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.38.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "a0dda61afacc19e7bba18d094ccb0371" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.38.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "4a3330cac1c22f256ed55a7b6ec7d27f" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.38.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "41e0bf2f651a0eddaf02564e674cae98" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.38.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "6cbf19fb4eea022885103e3ab1388ddf" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "902391c708f29ed56430ace0de2007e5" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.38.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b9f063ef5e259e8003d4e44e51ecac41" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "9a3308fb15891fbc6c52ef676b496646" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "5078bd0ef4c2162019cc8ccc100ffb4d" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "c9f100f19e5c8d5405dd693a77d6e9bb" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.39.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "e915aead37d8d0f59eedadcd3b3e18df" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.39.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "16a3f36860a6b64ff1bfb9a0f0dd7ba3" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.39.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "dac704a2b9321c3ebc7b37b220a459bf" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.39.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "dfbd6cac746ae26754d7571c8fc0bda3" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "04ebefb2db9de8d52d741f4e3f339813" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.39.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c53e4498af24a081ec3aefc8b65a0752" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.40.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "8a037a14fbdb43a46c3288fca97eb170" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.40.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "2aeb9990465fc42f41f6b4e5ce8bfc39" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "055c6be23675ce021ac4b4442a77dc1a" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.38.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9461760 }, { "name": "model.layers.39.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20471808 } ], "md5sum": "e0ca62e5e9a6fdaef96929a7c8147099" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.40.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "eabda0606507bc28a78e8dcbdcfb86cb" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "9d33137fc6483b01f5b06d35e26076a9" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "4f6ee53184f7051aa4230102c19da7e0" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.4.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "1946c68b5999456f4839b25daf0915a4" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.4.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "c5ca60926bc0acd879e34b48f90d0695" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "94c860761bdb35bab5557be25bb8ef5e" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "d7e23a2cc0f79b8665fe0f75e06f9956" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.5.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "7e68395707ab66621037e60dd42c491e" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.5.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "cf9afcf20774be6176b9fc5bf80a8e1f" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.5.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "d97693955b534197ba7b33d45b674de8" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.5.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "093653e8f65e2273813dba1ac84189c2" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "4cdb17cbdefbefc50f15a93bc331875e" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.5.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f18f687f1f7325efe029587557d6f042" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "41bcf4a82e064c40d748eb281bd7e99f" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "3617274f0d984dc3ba1f15c9fe9ba92b" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "2d0212569eecbd27fa0ee29061aa332d" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.40.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "f34b81a2b33f78069c933b5344d1b8ad" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.40.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "eaf680b459d3fe1c575206f4dc572bfd" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "23cc0d056d493c623b46044346121e40" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "f668d22b5b7d92a166b6a5b8fcd1fb57" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.41.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ef6f84f7b168a70ace6d96080ad081c9" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.41.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "560357686bbc89829ba022e3c51b3ef6" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.41.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "daea47b41b3590f1a7ab8559b5c0fe61" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.41.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a4e40ba9e841745bab45c97a8a432e9f" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "3260b5d5756d685bb304ce4f0f9c5373" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.41.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9a5f5c90fa9b0dafc4beb75755200393" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "5eb7093fa43f5dd845516dd23d343ae3" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "998119c1a588ff880f53dc755e5526e3" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.42.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "2068c3a37186c9338a2fc27b33529706" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.42.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "74c06fa9dac3c9d2c56a0008b6a6776d" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.42.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "939a9175212b3343df6afad686bdddb7" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.42.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "fb82383b0a7d7983e3961824c8c98b42" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "a7317192c2c3ed9ef36db190b343c8e3" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.5.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.41.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "25a2ab6476e23299131432ecc8f066df" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.42.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1d07622606d9f74f6316c79a347eabe8" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.43.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "aadd32888d240c898af1a4df2f314a88" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.43.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "ab565f20cbaa84be4fba4c7e3b7b76f9" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "ed151471e41abb00d3adb4f3691a9a06" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.43.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "232c4edb3848f05696a3a581da59d943" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "507c21547e318b8e81955b510fe1adee" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "76d49795e2692067a57c54f175a4c0ad" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "baeafad621dd744a78fa60482ff3f45a" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.43.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "b22639b963be117f6c64ca87a3cc762b" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.43.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "abf54024ba05d614539472860ae2b1c9" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "26fd6520c5a01339c64b40c6e764c1f6" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "9424d1104408be407d6d530633ccce83" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.44.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "3bec24f753e5ee10f6631dc70f9e9570" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.44.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "ccc2d4d2949a0f4a0f4eed64ff911939" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.44.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "5bff97807714a5cfe4af8be1eb15e860" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.44.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e1627960af7488c76fad84a040699a24" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "f895ca8d689814c2501c884bce1fec11" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.44.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e1beee862288032a0ce321cd95a5a27e" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "87e1a0b23d022d3f82e386ce918ffe13" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e75543c26e2406fc1601a8f81a47a8a1" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.45.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "88019c293fe2ba25bad0d49e58f549e4" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.45.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "2a3e709b60654792fabf8292b125169b" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.45.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "caca1d6d30a7ee769ceeb2f18624f6b0" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.45.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "db09aaea1bd5e99f32947c0286ef8e42" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "cf481dec12c0edf1e902c72ea15e679a" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.43.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.44.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "8a2d8e395e78e03bc1aecc94a081ce83" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.45.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1efa314545ae07cfb489a6626fb8e12b" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.46.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "9ccd3514808ffa5640c176c928aa6b40" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.46.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a6b803a6bd8772abe5d1f7da7889ca70" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "1f8fac97a287a3f2e5d4b58a3288d1da" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.46.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d64d0c220a7f3e11466a42180b53f688" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "792ecff8f70ebe5373c9377ca683a22b" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "0f8119ddb4cfac56c3aa143e2a80f2c5" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e95b73fce557947cbe1bb3ad07b4c336" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.46.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ab56fea06d8b5ae16fe851ed7c0e7799" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.46.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "0eed1362f0c4d5f9e7c0eb44aa86d5f4" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "6cf6758412b7692bcf89befaac158a94" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "671378bfa1bb85b7b5c677333f2531d8" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.47.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "2ac739d8a8050edd24196289e5557d8a" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.47.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "d32a890d2d2611896d097bad532f5aaa" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.47.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "86a4796647d1a941e44269284efb7d8d" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.47.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "07fcb3e0c852413a5cb36212ea82de8a" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "7ed7a76fb1f42b05513e2a4515d05e60" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.47.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6d6d6edc259184cdeda1437183a3c6b1" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "2477b73fa5c20553abdc0643c95ede0e" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "6929075f09e8b289084857d123b15d92" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.48.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "8e775f6ed5f09a9ce882c6f125e2689a" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.48.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "67c4a2b4a9fbd62c83441cd591df0e6a" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.48.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "460c47c7714e62220a4c8e2c919e3342" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.48.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "89ea78df9854e1fc5df712f9c5a30f24" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "4c2db0987ca722473f86177d730e0a97" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.46.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.47.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "9fcd05d7689788f6a35319739a84d1e6" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.48.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d4430ae9d4baa3f14798cfe8a187b174" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.49.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "6db99860d7f15fb1e79078834ffd13da" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.49.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "d118f994afcf9da4365bcba508500c16" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "11e85edb6856ad787969744f46258748" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.49.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "11e517be43057f2a84567f9d91b6f8cd" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.48.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "7b581f03d7c11d52d94ad7a272b9caa0" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ab72770824f9a110e63964a0f6b375b4" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "3529ae39ab077bba3507c586dda784c5" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.49.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "5fee79b9d542cbc435a407dd5bad403a" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.49.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "7d539d56ba7d9be6b8a57cf12fa34b87" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "c1a209047da42909f9a952809b6a43a9" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "9fe1eb771200819b29e98d959496120e" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.50.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "fb5e2152c09a5a8934a7ef297097344a" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.50.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "6553f1b2e1c01f322edb5280f80fb509" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.50.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "1c39b745b6a96a0a26fb5dce54b8ad69" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.50.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "b3b7fe018371f111cf67b84bbcf594ac" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "f6f4f62bacb1c254e5829ac64ca6a303" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.50.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "71d8f8a04954f34f964b1f0700415c60" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "1885e703a8d45e11ca2b4118e7201e34" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "b8267e6ab37daec06a20bfdaa5449add" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.51.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "02506ac1efc5ee0cacf69471e24683e7" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.51.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a98e94324c474dadff5224d7197011c0" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.51.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "c4a4aa36f568dc607a32e7a4a6934b14" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.51.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "b5fd70817dc914f8999ca65b262e18f7" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "36bfc19c1130b7946da59bb071c55860" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.49.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.50.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "133a966a3b83d4e511860c61912aa940" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.51.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "24c3687ce4e8260e797f67c1e1407772" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.52.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "863e1e2a6b2811438ec83c8d25159c9e" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.52.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "dc7110dc0114855504d389766a7ef27d" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "85e558ed89f101064cf4120f7ac35799" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.52.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d78d33ae729136c7a4786319f1382e69" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "f9cbab190a010bd223d35dee1d5e207a" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "141b1bfb3c58bf5b933defbddd9a66e0" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a3d669b18876cee06e9a1e53b3b8f2e7" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.52.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "8dce1d1dab439098969324fa9b82ce36" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.52.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a03bc1dfe0bca244c79f9873a3f13b50" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ba1fd1884985987f8402b2017ffff780" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "f18d1174ccb98bf6f2908e1213266a4b" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.53.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "6cf5f7697a91e8a756d823654d24cd7a" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.53.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "5a1ccccd4132ff98b1fd03f7df1e7f3c" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.53.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ccf531e0ce593314bfe3e660d603df0a" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.53.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "3d80f75490c360202f8989515dfb7657" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "f93d62ec4c9e66bf8d0d1bee1f570f0f" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.53.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9db325d6500aad2fc7b03544685f08bb" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "7fb796bbe1c3dd90f39043aee991302f" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "4d16282616b64e064d4f88bf26a6ee0b" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.54.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "dcfb59c8649eba1bbd43c300142b767a" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.54.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "574cdbdac63f4bce2a36adf6d1bac6b4" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.54.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "b5795336e1f44ad239ae7d6a17f1854c" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.54.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e37c7f400db490cb07f976dd40814a32" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "25de9d8fef1c5de7412bc35c21f535b4" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.52.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.53.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "aaab30880c644cba3238161c157e624a" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.54.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d2d2d19d8d36a324ab42b9f92d6cfc25" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.55.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "f8937c0f7708eaa5ec8199c833582aa0" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.55.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "656d2c40f7481df75b278c2d5881ba46" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "91ceb71505ad6f6d9ea4638ea8fac3cf" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.55.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "143c328d64bde3972adc9fffc8718c46" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "3e7451d1cb1068f044cbf625749bf17d" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "f226999e7a99cd26e26557a36ff9cdb0" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "1f99f0b03f69dd92f2db0d086775497a" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.55.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "4b7f09d8d61e0dd6505f2bef6aea343b" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.55.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "27b9078118f337f3f4d027dc88522500" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "bd22bd16a48aef7ebf1920a9d5b6c007" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e695c64aa41f2137c241acbad5904c8b" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.56.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ee91cabb08cc843d0fe02ad358b9225c" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.56.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "9782ca3d205795207a0d102822ab91e6" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.56.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "cd03dd52846d1031c55bb3b84468067b" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.56.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "100f76964d0ea01c6940689805d212ae" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "24717e4fa232edcd91035e028e270e4c" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.56.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7e63396fb6f837f90f633b9fb49197ed" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "5c33842b66a5ec2dfb2c4624c635efe1" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "f02259e4015843f2bf069d0f430aed89" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.57.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "e5ea0dd672cb7cf69eca26c2c8d3c8d9" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.57.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "4eda25ff3065877f6736c75020d4df37" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.57.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "1c6b689b55a8f7f2fdf2f26bdafff307" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.57.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "c27ce626d3e3962b0ae75231ef020697" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "1f0470ac2182e1e8b4febbb30508bc94" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.55.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.56.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "1f28762870df1c52252d2cbe6eabaacd" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.57.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3b5cb8e191a6db26c121d5a25b708137" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.58.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "0cf5895820a1ee6de792fd86bb1d1351" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.58.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "1a238148fe51fa24d9ce8e93efa137c4" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "c60436cdd0dc67f751fca6422e5d56e3" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.58.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8037a7b4a247b0f535bc7b77b492f50a" }, { "dataPath": "params_shard_482.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.57.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "d489a2d80cf97ae7f6cbfef05240520d" }, { "dataPath": "params_shard_483.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "279b461c741428cf6cf50a31f9fee116" }, { "dataPath": "params_shard_484.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "540742f6c4ca04b38519b23c88106869" }, { "dataPath": "params_shard_485.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.58.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "b4d5a3fefacd71b2182f27be7ae7955b" }, { "dataPath": "params_shard_486.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.58.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a0ff855f3ca463ff1cdbea46a26b45aa" }, { "dataPath": "params_shard_487.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "478fcc39b483e29a333f78fd7c06d53a" }, { "dataPath": "params_shard_488.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "3052d7f0bf79e0a902e94386d8024980" }, { "dataPath": "params_shard_489.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.59.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ef0f30584bbe372308c492e9d5903157" }, { "dataPath": "params_shard_490.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.59.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "913d0c8de0b073dbc688ac94cedd8f8d" }, { "dataPath": "params_shard_491.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.59.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "aef2ff545dc729ffa18c5b50fdac4142" }, { "dataPath": "params_shard_492.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.59.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "245c77d394f1d4d7a5c4ba267ef3e047" }, { "dataPath": "params_shard_493.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "56da5b036dc412292a68b6dd29ba2932" }, { "dataPath": "params_shard_494.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.59.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "82c036f61c97409f6be0090de298f987" }, { "dataPath": "params_shard_495.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "c0885b44efa72f20b4798d38bf07f7dd" }, { "dataPath": "params_shard_496.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "d5aecc2a16697a3606cd6d2ae68cc8c2" }, { "dataPath": "params_shard_497.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.6.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "cfcb849be40c42c7d8e40aaf9a18cf3a" }, { "dataPath": "params_shard_498.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.6.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "2e88dc4c4b643098b1a7eb3c139fd0b1" }, { "dataPath": "params_shard_499.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.6.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ce794544afffe1dc0870f7825cb2a443" }, { "dataPath": "params_shard_500.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.6.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "4c5ce2597fc5ccd1e7ad6085febccd88" }, { "dataPath": "params_shard_501.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "8774b3b95784113f663a63c6a63c7db0" }, { "dataPath": "params_shard_502.bin", "format": "raw-shard", "nbytes": 29958144, "records": [ { "name": "model.layers.58.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9461760 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9486336 }, { "name": "model.layers.59.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20496384 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 } ], "md5sum": "2e9e15f42129396c9762a193643649f5" }, { "dataPath": "params_shard_503.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.6.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4a01810fe9e6618ac23675502535ccf3" }, { "dataPath": "params_shard_504.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.7.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "1a36ef21024257a44aadd3f7d59ca169" }, { "dataPath": "params_shard_505.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.7.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "cb24fd728c67f0ac0c54101e498852bd" }, { "dataPath": "params_shard_506.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "bb90765cfbe106de8c4a74cf93ccd269" }, { "dataPath": "params_shard_507.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.7.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4b303c1fa119863ba8bb7c80ae9177a7" }, { "dataPath": "params_shard_508.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20447232 } ], "md5sum": "992749d0429e39b4533f8986d8b94091" }, { "dataPath": "params_shard_509.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "8c9127168ed527f51a09deffa4be63e2" }, { "dataPath": "params_shard_510.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "5b0b81a4158ad4f2212af8f3f12e5501" }, { "dataPath": "params_shard_511.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.60.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "dbf4205a31594d0e0e8a75618ae9c9d7" }, { "dataPath": "params_shard_512.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.60.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "3c79de4dc413c12dd41e14ebaf0689eb" }, { "dataPath": "params_shard_513.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.60.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "fbc49432582556de296143eff64421a6" }, { "dataPath": "params_shard_514.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.60.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "f66f2c416f5cd81e1f8689bb5cdba484" }, { "dataPath": "params_shard_515.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "ed22974d964945a660cc1e878cc7c66f" }, { "dataPath": "params_shard_516.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.60.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "769294211d6dcb6c784f1219217a7676" }, { "dataPath": "params_shard_517.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.61.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "84ed064d3e3b94c114f92ad9b9507dde" }, { "dataPath": "params_shard_518.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.61.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "14ab5e32cf12aa09f4a0287858d44260" }, { "dataPath": "params_shard_519.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "550dfdf57c271ddcfab6162aa6f7d971" }, { "dataPath": "params_shard_520.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.7.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9461760 }, { "name": "model.layers.60.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20471808 } ], "md5sum": "e8dbd9216fba8a1a81ea35c25b6d6c6f" }, { "dataPath": "params_shard_521.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.61.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8eb5a19878ad4c030f434c6037db49ed" }, { "dataPath": "params_shard_522.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "ee5124a168fa145ca7b6dd73fbf5b6d5" }, { "dataPath": "params_shard_523.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "a2afe21446b0f14eb27f0e41fca2be5d" }, { "dataPath": "params_shard_524.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.61.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "671fd9b5f6b9e926f6a12f12ab6837db" }, { "dataPath": "params_shard_525.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.61.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "5688612df5305de8f432c2af323e9bf2" }, { "dataPath": "params_shard_526.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "4bc158c6ec280f38a5d728f0197150b9" }, { "dataPath": "params_shard_527.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "429cc2a64ace72a4668f325f49f9d9b1" }, { "dataPath": "params_shard_528.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.62.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "7c65bdf9da2c1e3bc669229b2a34052c" }, { "dataPath": "params_shard_529.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.62.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "0a618f4b843e6e28332a11206e0af172" }, { "dataPath": "params_shard_530.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.62.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "73ec3424a3582840698c271fdb63954c" }, { "dataPath": "params_shard_531.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.62.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "cf65dc64ea7a01bb44f82e5eb1ec9a1b" }, { "dataPath": "params_shard_532.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "ce504560e8d5199b01f88c80aaccf730" }, { "dataPath": "params_shard_533.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.62.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "12d63bc702d49f106f94bd2e584a4447" }, { "dataPath": "params_shard_534.bin", "format": "raw-shard", "nbytes": 31506432, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20447232 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 20471808 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 20496384 } ], "md5sum": "048b671ae952d503393d37873e317680" }, { "dataPath": "params_shard_535.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "303dc851631cda20cffb7d15a7d52740" }, { "dataPath": "params_shard_536.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "e06298408845ad4417f56d6f2df7c16e" }, { "dataPath": "params_shard_537.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.63.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "fa910ba5e9a1ac294ee34f26bf28bdf1" }, { "dataPath": "params_shard_538.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.63.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "2bb20544d2556eaee9208a7f50c5ae6e" }, { "dataPath": "params_shard_539.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.63.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "3da579463edd4e89a78a3a3ef87ae1e5" }, { "dataPath": "params_shard_540.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.63.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "ad2910312a529397aab746d073a10d25" }, { "dataPath": "params_shard_541.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "e3efac10169a5b6bc555879c397d4d48" }, { "dataPath": "params_shard_542.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.63.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f7ad74b98365ba9f0c955da44f7c5cef" }, { "dataPath": "params_shard_543.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "9de20d00727d5a3bab44ee20b22d4815" }, { "dataPath": "params_shard_544.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "9470e2c4fab5eb2e045f1d240354a916" }, { "dataPath": "params_shard_545.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.7.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "37054acc6abe8b0fe9f1b91eb4585a40" }, { "dataPath": "params_shard_546.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.7.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "ab0dc3a9528eae81e5d6eee7ce92430b" }, { "dataPath": "params_shard_547.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 12288, 4224 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "cecff68a6a804beac5a2dbc911853cce" }, { "dataPath": "params_shard_548.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 12288, 1056 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "761a37c179492819c88dec4818d8b214" }, { "dataPath": "params_shard_549.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.8.mlp.gate_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "cea415b20c751595da2bf6a7aa25720e" }, { "dataPath": "params_shard_550.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.8.mlp.gate_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "130be3fee135ed7112c7e990bb50e982" }, { "dataPath": "params_shard_551.bin", "format": "raw-shard", "nbytes": 207618048, "records": [ { "name": "model.layers.8.mlp.up_proj.q_weight", "shape": [ 33792, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 207618048, "byteOffset": 0 } ], "md5sum": "0c5a4ed789995113659a30ec85f22090" }, { "dataPath": "params_shard_552.bin", "format": "raw-shard", "nbytes": 25952256, "records": [ { "name": "model.layers.8.mlp.up_proj.q_scale", "shape": [ 33792, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25952256, "byteOffset": 0 } ], "md5sum": "7eaf0ccffa4afbc17285f9104aa01731" }, { "dataPath": "params_shard_553.bin", "format": "raw-shard", "nbytes": 88080384, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 14336, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 88080384, "byteOffset": 0 } ], "md5sum": "25aa631285fb8c1e422e2906e0019918" }, { "dataPath": "params_shard_554.bin", "format": "raw-shard", "nbytes": 29982720, "records": [ { "name": "model.layers.62.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 9437184 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 9461760 }, { "name": "model.layers.63.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 20471808 }, { "name": "model.norm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29908992 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29933568 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 24576, "byteOffset": 29958144 } ], "md5sum": "5b93b8a0ad4b7fcff64e2283d0a9aba0" }, { "dataPath": "params_shard_555.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.8.self_attn.out_proj.q_weight", "shape": [ 12288, 1536 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7a6f8d6a32ccde6586fbb5f354b87040" }, { "dataPath": "params_shard_556.bin", "format": "raw-shard", "nbytes": 20447232, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 14336, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11010048, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.out_proj.q_scale", "shape": [ 12288, 384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11010048 } ], "md5sum": "d696d3a1cde821e670e3548a0e2b7c4d" } ] }