{ "metadata": { "ParamSize": 269, "ParamBytes": 387649536.0, "BitsPerParam": 5.005399562680047 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 77791232, "records": [ { "name": "lm_head.q_weight", "shape": [ 151936, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 77791232, "byteOffset": 0 } ], "md5sum": "47249f991afc8ef584e8c2435ca90870" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 77791232, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 151936, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 77791232, "byteOffset": 0 } ], "md5sum": "47249f991afc8ef584e8c2435ca90870" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33329152, "records": [ { "name": "lm_head.q_scale", "shape": [ 151936, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9723904, "byteOffset": 0 }, { "name": "model.embed_tokens.q_scale", "shape": [ 151936, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9723904, "byteOffset": 9723904 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 19447808 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 19449856 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 20891648 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 21071872 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 23955456 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 24315904 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 24317952 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 24324096 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 25896960 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 26093568 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 26617856 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 26683392 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 26685440 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 28127232 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 28307456 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 31191040 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 31551488 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 31553536 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 31559680 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 33132544 } ], "md5sum": "5bb7ad6255b35bf3e54d021b245bc9f0" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31156224, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 524288 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 589824 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 591872 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 2033664 }, { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 2213888 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 5097472 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 5457920 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 5459968 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 5466112 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 7038976 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 7235584 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 7759872 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 7825408 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 7827456 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 9269248 }, { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 9449472 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 12333056 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 12693504 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 12695552 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12701696 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 14274560 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 14471168 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 14995456 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 15060992 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 15063040 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 16504832 }, { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 16685056 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 19568640 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 19929088 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 19931136 }, { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19937280 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 21510144 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 21706752 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 22231040 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 22296576 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 22298624 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 23740416 }, { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 23920640 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 26804224 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 27164672 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 27166720 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 27172864 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 28745728 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 28942336 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 29466624 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 29532160 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 29534208 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 30976000 } ], "md5sum": "991e0aecad9c88a5cc1ca0be85238687" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32194560, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 2883584 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 3244032 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 3246080 }, { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 3252224 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 4825088 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 5021696 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 5545984 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 5611520 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 5613568 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 7055360 }, { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 7235584 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 10119168 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 10479616 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 10481664 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 10487808 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 12060672 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 12257280 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 12781568 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 12847104 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 12849152 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 14290944 }, { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 14471168 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 17354752 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 17715200 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17717248 }, { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 17723392 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 19296256 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 19492864 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 20017152 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 20082688 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 20084736 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 21526528 }, { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 21706752 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 24590336 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 24950784 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 24952832 }, { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 24958976 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 26531840 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 26728448 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 27252736 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 27318272 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 27320320 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 28762112 }, { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 28942336 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 31825920 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 32186368 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 32188416 } ], "md5sum": "cd0fce2641f8d5513fded0d5552fabe9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 32925696, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 1572864 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 1769472 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 2293760 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 2359296 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 2361344 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 3803136 }, { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 3983360 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 6866944 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 7227392 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 7229440 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 7235584 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 8808448 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 9005056 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 9529344 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 9594880 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 9596928 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 11038720 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 11218944 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 14102528 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 14462976 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 14465024 }, { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 14471168 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 16044032 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 16240640 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 16764928 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 16830464 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 16832512 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 18274304 }, { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 18454528 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 21338112 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 21698560 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21700608 }, { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 21706752 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 23279616 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 23476224 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 24000512 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 24066048 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 24068096 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 25509888 }, { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 25690112 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28573696 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 28934144 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 28936192 }, { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28942336 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 30515200 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 30711808 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 31236096 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 31301632 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 31303680 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 32745472 } ], "md5sum": "4bebe31dccbfe72d305fc95478773fc2" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 32194560, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 2883584 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 3244032 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 3246080 }, { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 3252224 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 4825088 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 5021696 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 5545984 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 5611520 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 5613568 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 7055360 }, { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 7235584 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 10119168 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 10479616 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 10481664 }, { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 10487808 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 12060672 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 12257280 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 12781568 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 12847104 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 12849152 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 14290944 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 14471168 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 17354752 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 17715200 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 17717248 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 17723392 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 19296256 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 19492864 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 20017152 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 20082688 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 20084736 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 21526528 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 21706752 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 24590336 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 24950784 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 24952832 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 24958976 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 26531840 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 26728448 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 27252736 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 27318272 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 27320320 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 28762112 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 28942336 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 31825920 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 32186368 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 32188416 } ], "md5sum": "bd3887400fca31b3e10531bdc8589492" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 31303680, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 1572864 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 1769472 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 2293760 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 2359296 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 2361344 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 3803136 }, { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 3983360 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 6866944 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 7227392 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 7229440 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 7235584 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 8808448 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 9005056 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 9529344 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 9594880 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 9596928 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 11038720 }, { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 11218944 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 14102528 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 14462976 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 14465024 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 14471168 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 16044032 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 16240640 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 16764928 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 16830464 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 16832512 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 18274304 }, { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 18454528 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 21338112 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 21698560 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 21700608 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 21706752 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 23279616 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 23476224 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 24000512 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 24066048 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1024, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1441792, "byteOffset": 24068096 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1024, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 180224, "byteOffset": 25509888 }, { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 5632, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 25690112 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 5632, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28573696 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 28934144 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 3072 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6144, "byteOffset": 28936192 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 3072, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28942336 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 3072, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 196608, "byteOffset": 30515200 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 1024, 128 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 524288, "byteOffset": 30711808 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1024, 32 ], "dtype": "bfloat16", "format": "raw", "nbytes": 65536, "byteOffset": 31236096 }, { "name": "model.norm.weight", "shape": [ 1024 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2048, "byteOffset": 31301632 } ], "md5sum": "087583ac4f7465ef497959dbc5802c8b" } ] }