{ "metadata": { "ParamSize": 170, "ParamBytes": 6425499648.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 788004864, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 788004864, "byteOffset": 0 } ], "md5sum": "ad5b241e75194e932113ce287edfd3e5" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "49b4d4c409da6bdd739e9645457fbc28" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "08f668bf0839e1a1a1e49e8b4d31d8a0" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6a833ac91147731d0dc694797387c655" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "223470f4bf8228bb0407825886d53c4e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c3c4fc75c26d9e1c28aa943caaffae43" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "ed6d793fc2c6b84e239a989ccf0f0c63" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0206bc3ab17cdfdc7ce7c1f5eec31f11" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e54a86424568408412013b4061c52457" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2e562be0f2ef06acca5d5dc2d4d69083" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "345e4cf1097065e91b9917fc79267ad9" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "81bfc646c8625007af2981512c5367a8" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a2b2fbdcc692dea5cabb310d9c6169ef" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2fb0350bef3892ee6cf7d17d53d512ee" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "287c107bf9ed9301cc71d715204f9be3" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "91507ab82a0d6dc77d41924a1d9b8863" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "22e107f1422969c7ae300ec3429a19ab" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5d1098cdba5e4706a47b7d5dd9a73520" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "2cc80b6c74f4edc400f1c5d56ac19925" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2a21ff1977220b6c5f63047b288e54e3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e2713d579e29395c7252668f13274808" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7ca0318836b0573b059dc4091244de0f" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "6a6ab8255ffa0f2d997070e1ace2a330" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "755ef4e485bf7e00cf5f19fa9171fc13" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "dc2822cbc41bd102994668e4062c1490" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "119960688bc49d4410464d16f8d68b91" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "ac6b7487ae103887d2e6cadb25a77bea" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cd5ff980cedce31715cb76142ca422f6" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "091631025e4b9ba90a61bf12172a16e6" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5f218d808438c1c5dc4f63f427657f51" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "8ef192f6ac93f5bb799c0ef37d0e30e3" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d9168ee13fa059a41231ac8034456e6c" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c3c5313db66e08011ce600c8808b355c" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a4f75c51e17fca235f346fc742144dbc" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "e3615902377476ed86467521108670ad" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bb81537fd44aa831afb3bfb5a1cddd17" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9f0cc61b2b61536d436c80fc044cb49a" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "96e577feaa61e6984b5868a7a70fdcdb" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "fb31470bb7bb699ca815f6d487dc3c05" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b367cddcbe44c1864f11fc9ed83f96f3" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e7cc7b53951b8844fa3b05b6a507f69a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "acb2012e225c8d875d2e6204506d4eac" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "13283901e7a78ea5ca61433bc2641b25" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f3bc36db1a0a25ddb7103a428fd40e18" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "05a117aee8bd862976dced128cc6eb6b" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "934e2cbd292621642feba80ac33786b8" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "7eb4c5b3729ff22fe1162b8ef46d3ebf" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d047246a68038f9d8d65ef4608c0640c" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "52a7d358264e947cf3eebf73788cde0b" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1628743f1a42b9d5ec3fbda78089ffb8" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "b3cd61d4bbdfcfa7f29c195f6ba2c473" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bc97d679bff31438e1a8890070ba5ac5" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ee225468a68e72348f7776fa30b0c3b9" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "1d547401ec8fa9e8e2dd3bbcb8bb6ca1" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ad3cea57ffeab5dee2de4c3865b21f1e" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6b44b2c4c18b97b68361b3a22ec98396" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5c78352a13928e4adb7aa99937ecdcc6" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "d40b3feabdaaaacec05f41e53b6ad5f7" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8e1d27f24ffa0d7f834395567a0626cf" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f5afb2097169c2c4a37777d8e78552c8" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4f83702709846ffc41b1d69452986319" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "05e7664938e1a08cadecf5ba44d848c0" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5be6306bbe6b73e506de669f47c8b76c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a7be6ffe7e28f4b42cca0a640998d6b2" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3dddd3e32fc2a304c519753d9184e529" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "461b504aaf3741f5ed568e8928094a6e" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "26f9ffe723de0ede7825ff6b544c075a" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4982aa8b1b1432d7dd8af059da328627" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7c90e16544155e069b6cdb5208c209c7" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "d89651c2e38049b1a552c13627370e23" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ec306966c7e89f7895778ce8b14f4707" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "299c3c2258981caf33ed1d31cf8c78e2" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "35d232bb60cbd45ca7a17a544c4dbf06" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "4b3b50a160e7257b70ce5e2bbe6e3d6f" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "20cdc2f04631e2a438cb6120031e62d9" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5bf4e3aa941df2c0e7d3146a29510016" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "293da23d33827869b58ac7f3ccd8d607" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "b6a83e51dd363ec4fa410f01f6f406a3" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "89043a4b43f7d9b52077c9bb7ebe1ced" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "fab351236a82a4ed4eaaef64c89fd857" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d997be401f1af829feb81876433f2e85" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "6f56b9db892f385964dde0e37dee8e07" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6c853563e08a309c46b357b69d0374b4" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "edbfe726658fdea9d7ab1a2c72b9e127" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8d409e8d7a30fbbaa4233b822ed0d300" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "29ec9f881a83f51b7bdbb0b92a6226f2" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "f80502b938f393358ef4350408bf094e" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f7e8db850638377b19c57093b00361ee" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ce7ade97d9cb08c0411da69e046a3a7f" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "59df560616188f1a16f02cd7e1497a71" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "a4c7292ff72360c29e61c567c1443873" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c7029d83c52934cb8416529d37af541e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5a46e5db5af3fe6c4cacf434f13977d6" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "824aa7d0fae4084ed9e4ba934127ef0b" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "fcb306034f5325aaa26ac48c62fd16f9" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "dc8109965ff7b0807bcd5ae3f48641c1" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4406ea776d974f4423f84496deb69a49" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4adc35d6a727d2c2370ee6a26e369220" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "8034e48ccafebfc3a7e0f7589f99222e" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0c3a596243236dd711ba13d5881234c8" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "36311bb7741c4ed38012a186f9e60eb0" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2c3ecd69de27229a3463f4055664ebd9" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "e254ded35664683fb1b5cf12865cfce4" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0c679f6afc92093e60d6a57a9c3171fe" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7770425eaa5ed60ca6aa257d46192768" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a3aae980f4e3945fa782a6e16f851048" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "3273dfc4cead928697b5a50cb59829d4" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e03dcfad47c8923ee39be6b09fad64a2" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a3c3a8a27fb7332cdd2b2af37deb6e1c" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "bb9213dcffee9123b49a1172b2711600" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 0 } ], "md5sum": "46c404a22b2c2db4bc1ace1b77e9aa57" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "462fd0e9b2b626ee2144acfca5671c8c" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 31807488, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6144 }, { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 5120, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 31457280, "byteOffset": 12288 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31469568 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31475712 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31481856 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31488000 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31494144 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31500288 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31506432 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31512576 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31518720 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31524864 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31531008 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31537152 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31543296 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31549440 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31555584 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31561728 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31567872 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31574016 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31580160 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31586304 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31592448 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31598592 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31604736 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31610880 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31617024 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31623168 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31629312 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31635456 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31641600 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31647744 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31653888 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31660032 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31666176 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31672320 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31678464 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31684608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31690752 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31696896 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31703040 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31709184 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31715328 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31721472 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31727616 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31733760 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31739904 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31746048 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31752192 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31758336 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31764480 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31770624 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31776768 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31782912 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31789056 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31795200 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31801344 } ], "md5sum": "8ee51d640f3008dcccc13eac7d6b36b0" } ] }