{ "metadata": { "ParamSize": 183, "ParamBytes": 1409830912.0, "BitsPerParam": 4.500347711112945 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 256000, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "0df787bcdc08d4792eab2601f7a02391" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "38809e07c773a53538e8ee568783222b" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32772096, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 256000, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768000, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 32768000 } ], "md5sum": "7a79b1fec36e129a32980ffa183127f7" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "04ce68e8314d8d88dcd689572cf162f6" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f7d37391e98f9f1a12fc21c33c36867e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 2097152 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 6291456 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 6295552 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 8916992 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 9244672 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 11341824 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11603968 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 11608064 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 28385280 } ], "md5sum": "ca7530fc8c466ebfdc8c2d8d16592521" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3c37eafe7dbd13930e461b7e86b376cc" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 32583680, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4194304 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 4198400 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 6819840 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 7147520 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 9244672 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 9506816 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 9510912 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26288128 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 28385280 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 32579584 } ], "md5sum": "7ed3f8e1053fa15d84066d713de098c8" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "01eeda0a35c852b9c862da66136f0b4a" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33431552, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 2621440 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 2949120 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 5046272 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 5308416 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 5312512 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 22089728 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24186880 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28381184 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 28385280 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 31006720 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 31334400 } ], "md5sum": "1d8bd7b542a6db13ccfb1feeb8cb98ea" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4d5d73bee96dc9441047fe05fe2ff51f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "92d1a0261da0f6a1ab9c0daacaeddcb0" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "898c0f0b09a4e65252e8670a39e1274d" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 30748672, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 262144 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 266240 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 17043456 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19140608 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23334912 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23339008 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 25960448 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26288128 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28385280 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28647424 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 28651520 } ], "md5sum": "6a4c47f2592e6a2a24a9e2d52e380fb8" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9b68b917de243b7c8f3a8763f7d902da" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 32583680, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4194304 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 4198400 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 6819840 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 7147520 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 9244672 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 9506816 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 9510912 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26288128 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 28385280 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 32579584 } ], "md5sum": "7da702873448e3d4d1eff02df94aafcc" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f7933e33915c46f863971774bca2037d" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 33431552, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 2621440 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 2949120 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 5046272 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 5308416 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 5312512 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 22089728 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24186880 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28381184 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 28385280 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 31006720 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 31334400 } ], "md5sum": "e8f50082023b18a9f1f63c063000b573" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c795b4e1194cee8a3d9c46fe73bdd319" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "511c008568e45ce28ab1fd179c7d7237" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 32841728, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 262144 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 266240 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 17043456 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19140608 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23334912 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23339008 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 25960448 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26288128 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28385280 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 28647424 } ], "md5sum": "9f6948c9a0b751e6280e8b782645e4c1" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d7d360dd3a0f0de4c4b912a293d33d67" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33431552, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 2621440 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 2949120 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 5046272 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 5308416 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 5312512 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 22089728 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24186880 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28381184 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 28385280 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 31006720 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 31334400 } ], "md5sum": "e9b8c270cc5280ec29e7f4f908f42854" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0a55ffb6a71f8ba3e226007c01356e03" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "0a8c18ce73c7d08800dc0883bbe008b6" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8f3a97a760189f133ad4a75b1ab08bf7" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 30748672, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 262144 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 266240 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 17043456 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19140608 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23334912 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23339008 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 25960448 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26288128 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28385280 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28647424 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 28651520 } ], "md5sum": "dbde9eb9c7031f27bd2e8ba775a33f63" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "003deb4690a82c86a57f465c82c6d9ab" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 32583680, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4194304 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 4198400 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 6819840 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 7147520 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 9244672 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 9506816 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 9510912 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26288128 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 28385280 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 32579584 } ], "md5sum": "9a40d939b4c0579059c769b2577e196b" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0fdea51ca301d13b6062e5f2c8f38f4b" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33431552, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 2621440 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 2949120 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 5046272 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 5308416 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 5312512 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 22089728 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24186880 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28381184 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 28385280 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 31006720 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 31334400 } ], "md5sum": "4b04b00c7d8ff893f43135a86e17b109" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a7e869b9444ee84689f4d425550aa63c" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "c5061384861a44c5f7f9c31c0e2e2c9b" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "af8ee3cecdb42a2acd6f4303032247fb" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 30748672, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 262144 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 266240 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 17043456 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19140608 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 23334912 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 23339008 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 25960448 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26288128 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 28385280 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 28647424 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 28651520 } ], "md5sum": "2e0f787f38f03557001add93643a268d" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 32768, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "916844a47465c18ac666379ab52812bd" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 32583680, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4194304 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 4198400 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 6819840 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 7147520 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 9244672 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 9506816 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 9510912 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 26288128 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 32768, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 28385280 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 32579584 } ], "md5sum": "6ed8107bf7ef39faa81ba825f0468912" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 24195072, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 2560, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2621440, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 2560, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 327680, "byteOffset": 2621440 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 2949120 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 5046272 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 5308416 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 2048, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 5312512 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 2048, 512 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 22089728 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 24186880 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 24190976 } ], "md5sum": "7d6ba444fa89b2e8c6b7a07c07241947" } ] }