diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
index 03c4ef3e4939d9f03b765508a086284e4791a802..b1e9648786b160a2dca295b27fc45aeaafc9ba78 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,31 @@
---
-license: llama3
+language:
+ - en
+ - de
+ - fr
+ - it
+ - pt
+ - hi
+ - es
+ - th
+license: llama3.2
+base_model: NousResearch/Hermes-3-Llama-3.2-3B
+base_model_relation: quantized
+library_name: mlc-llm
+pipeline_tag: text-generation
+tags:
+- Llama-3
+- instruct
+- finetune
+- chatml
+- gpt4
+- synthetic data
+- distillation
+- function calling
+- json mode
+- axolotl
+- roleplaying
+- chat
---
+
+4-bit [OmniQuant](https://arxiv.org/abs/2308.13137) quantized version of [Hermes-3-Llama-3.2-3B](https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B) for inference with [Private LLM](http://privatellm.app).
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1997fb2e09625d2187f767b9f1643dda7fa236f3
--- /dev/null
+++ b/config.json
@@ -0,0 +1,6 @@
+{
+ "quantization_config": {
+ "bits": 4
+ }
+}
+
diff --git a/ndarray-cache.json b/ndarray-cache.json
new file mode 100644
index 0000000000000000000000000000000000000000..666fb5ba1262d27b2a83b6f592a46419ca0704eb
--- /dev/null
+++ b/ndarray-cache.json
@@ -0,0 +1,3521 @@
+{
+ "metadata": {
+ "ParamSize": 283,
+ "ParamBytes": 1656834048.0,
+ "BitsPerParam": 4.069857841273854
+ },
+ "records": [
+ {
+ "dataPath": "params_shard_0.bin",
+ "format": "raw-shard",
+ "nbytes": 197001216,
+ "records": [
+ {
+ "name": "model.embed_tokens.q_weight",
+ "shape": [
+ 128256,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 197001216,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "afed46c2626032adaa3a00e22797b92d"
+ },
+ {
+ "dataPath": "params_shard_1.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "10c45988a8aa86a7a036bae52c98a8b8"
+ },
+ {
+ "dataPath": "params_shard_2.bin",
+ "format": "raw-shard",
+ "nbytes": 32913408,
+ "records": [
+ {
+ "name": "model.embed_tokens.q_scale",
+ "shape": [
+ 128256,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6156288,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.0.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 6156288
+ },
+ {
+ "name": "model.layers.0.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 6162432
+ },
+ {
+ "name": "model.layers.0.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 18745344
+ },
+ {
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 19138560
+ },
+ {
+ "name": "model.layers.0.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 19924992
+ },
+ {
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 19931136
+ },
+ {
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 27795456
+ },
+ {
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 28041216
+ },
+ {
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 32759808
+ },
+ {
+ "name": "model.layers.1.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 32907264
+ }
+ ],
+ "md5sum": "04a1dae5481c20346e8b8852527862ba"
+ },
+ {
+ "dataPath": "params_shard_3.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "0ef724e70788f44939bf0e73e96d7a90"
+ },
+ {
+ "dataPath": "params_shard_4.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.1.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.1.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.1.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.10.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "edc8c85302ddcc0715c3ccf2e529901e"
+ },
+ {
+ "dataPath": "params_shard_5.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ffc20dc34865aa146d73cdfbdaeafdbc"
+ },
+ {
+ "dataPath": "params_shard_6.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.10.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.10.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.10.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.11.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "0be50caf0e03ca98071f1485c5f445f3"
+ },
+ {
+ "dataPath": "params_shard_7.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e38ab2c47d1ece68bf58c897ef052082"
+ },
+ {
+ "dataPath": "params_shard_8.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.11.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.11.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.11.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.12.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "de11c331209ac1da630fdd6b6ca41cc8"
+ },
+ {
+ "dataPath": "params_shard_9.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.12.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c83d31bdc8a32c7cd685f7780884a0a0"
+ },
+ {
+ "dataPath": "params_shard_10.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.12.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.12.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.12.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.12.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.12.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.12.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.12.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.12.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.13.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "e84bd5b63a224d068ff9cf2c31042bfe"
+ },
+ {
+ "dataPath": "params_shard_11.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ce8b583d04d00eda044d90b3a3ae4157"
+ },
+ {
+ "dataPath": "params_shard_12.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.13.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.13.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.13.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.13.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.13.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.13.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.13.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.14.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "475c4991b84ba39097fa26d5f7e8dd8f"
+ },
+ {
+ "dataPath": "params_shard_13.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.14.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "51ed30cbc2bc3665cf7d2777323168f0"
+ },
+ {
+ "dataPath": "params_shard_14.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.14.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.14.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.14.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.14.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.14.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.14.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.14.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.14.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.15.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "5d1577c3281078ee2401f3284940aeab"
+ },
+ {
+ "dataPath": "params_shard_15.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.15.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4da070ae30d04ff2cc6efeeea4f2a540"
+ },
+ {
+ "dataPath": "params_shard_16.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.15.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.15.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.15.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.15.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.15.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.15.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.15.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.15.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.16.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "bfbbfc67e15a76b14deb26f67255dca3"
+ },
+ {
+ "dataPath": "params_shard_17.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "27471e3c4f1d553eae646f2c65a7f223"
+ },
+ {
+ "dataPath": "params_shard_18.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.16.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.16.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.16.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.16.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.16.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.16.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.16.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.17.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "e7a63cefef49b1ad6c9db0ae7dfd7e66"
+ },
+ {
+ "dataPath": "params_shard_19.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.17.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ed993fbe46efa519c0911b0053adac8c"
+ },
+ {
+ "dataPath": "params_shard_20.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.17.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.17.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.17.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.17.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.17.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.17.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.17.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.17.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.18.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "bfb160ecfc8f59a8d2f5ccb546d5897d"
+ },
+ {
+ "dataPath": "params_shard_21.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "922b38d3c9ce559240fc9d4d59a3411e"
+ },
+ {
+ "dataPath": "params_shard_22.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.18.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.18.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.18.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.18.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.18.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.18.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.18.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.19.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "37b2d7524cb5eb0ac1d9238cf718b561"
+ },
+ {
+ "dataPath": "params_shard_23.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.19.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "35cb949bdf1c9a85fcde41294fd9f8c2"
+ },
+ {
+ "dataPath": "params_shard_24.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.19.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.19.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.19.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.19.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.19.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.19.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.19.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.19.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.2.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "f52390f3999d4ee4f3b60c59de1544ac"
+ },
+ {
+ "dataPath": "params_shard_25.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "79c4ba798cfac5a5ecdb8f6395ae77a6"
+ },
+ {
+ "dataPath": "params_shard_26.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.20.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "5a8509548e2757e40347a7065e865f38"
+ },
+ {
+ "dataPath": "params_shard_27.bin",
+ "format": "raw-shard",
+ "nbytes": 27531264,
+ "records": [
+ {
+ "name": "model.layers.2.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.2.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.2.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.20.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "d4e46251ed1306ff9b1864dee5510626"
+ },
+ {
+ "dataPath": "params_shard_28.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "7fafaf6c3605a3ac41525c2e663b47ac"
+ },
+ {
+ "dataPath": "params_shard_29.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.20.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.20.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 7864320
+ },
+ {
+ "name": "model.layers.20.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 8110080
+ },
+ {
+ "name": "model.layers.20.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 12828672
+ },
+ {
+ "name": "model.layers.3.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.3.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 12982272
+ },
+ {
+ "name": "model.layers.3.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 25565184
+ },
+ {
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 25958400
+ },
+ {
+ "name": "model.layers.3.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "5e76d2fb99aa337aed52debc83d55a0c"
+ },
+ {
+ "dataPath": "params_shard_30.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "0ca27b977c9e90f176b0f5951e9198ac"
+ },
+ {
+ "dataPath": "params_shard_31.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 7864320
+ },
+ {
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 8110080
+ },
+ {
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 12828672
+ },
+ {
+ "name": "model.layers.4.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.4.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 12982272
+ },
+ {
+ "name": "model.layers.4.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 25565184
+ },
+ {
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 25958400
+ },
+ {
+ "name": "model.layers.4.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "d5c8332e05928c74f3654e6229b4eb1e"
+ },
+ {
+ "dataPath": "params_shard_32.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "69b0e2c66f0be00e4c2b513348ccc58d"
+ },
+ {
+ "dataPath": "params_shard_33.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 7864320
+ },
+ {
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 8110080
+ },
+ {
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 12828672
+ },
+ {
+ "name": "model.layers.5.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.5.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 12982272
+ },
+ {
+ "name": "model.layers.5.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 25565184
+ },
+ {
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 25958400
+ },
+ {
+ "name": "model.layers.5.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "d912af8e1795dd5e1aa69697fef3f87f"
+ },
+ {
+ "dataPath": "params_shard_34.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "de6b938ecf7a59bdba88578a141a43ab"
+ },
+ {
+ "dataPath": "params_shard_35.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 7864320
+ },
+ {
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 8110080
+ },
+ {
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 12828672
+ },
+ {
+ "name": "model.layers.6.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.6.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 12982272
+ },
+ {
+ "name": "model.layers.6.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 25565184
+ },
+ {
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 25958400
+ },
+ {
+ "name": "model.layers.6.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "f8b63c2bb4c3753f760931b6894be087"
+ },
+ {
+ "dataPath": "params_shard_36.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1220d854fc6dfdb610c222a9832928a7"
+ },
+ {
+ "dataPath": "params_shard_37.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 7864320
+ },
+ {
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 8110080
+ },
+ {
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 12828672
+ },
+ {
+ "name": "model.layers.7.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.7.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 12982272
+ },
+ {
+ "name": "model.layers.7.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 25565184
+ },
+ {
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 25958400
+ },
+ {
+ "name": "model.layers.7.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "7e60562f90b4c954bfbc1eda5c3c6164"
+ },
+ {
+ "dataPath": "params_shard_38.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d1b3ccbdc1e2b827f67f370aae514efa"
+ },
+ {
+ "dataPath": "params_shard_39.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 7864320
+ },
+ {
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 8110080
+ },
+ {
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 12828672
+ },
+ {
+ "name": "model.layers.8.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.8.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 12982272
+ },
+ {
+ "name": "model.layers.8.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 25565184
+ },
+ {
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 25958400
+ },
+ {
+ "name": "model.layers.8.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "6bc668813dfa99b089384564982008a3"
+ },
+ {
+ "dataPath": "params_shard_40.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e4ede658990bba83e16121f59ea75385"
+ },
+ {
+ "dataPath": "params_shard_41.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 7864320
+ },
+ {
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 8110080
+ },
+ {
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 12828672
+ },
+ {
+ "name": "model.layers.9.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.9.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 12982272
+ },
+ {
+ "name": "model.layers.9.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 25565184
+ },
+ {
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 25958400
+ },
+ {
+ "name": "model.layers.9.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "a30c76affb75d2beea28dd974eac7d42"
+ },
+ {
+ "dataPath": "params_shard_42.bin",
+ "format": "raw-shard",
+ "nbytes": 25970688,
+ "records": [
+ {
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 7864320
+ },
+ {
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 8110080
+ },
+ {
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 12828672
+ },
+ {
+ "name": "model.layers.20.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.20.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 12982272
+ },
+ {
+ "name": "model.layers.20.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 25565184
+ },
+ {
+ "name": "model.layers.20.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 25958400
+ },
+ {
+ "name": "model.layers.21.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 25964544
+ }
+ ],
+ "md5sum": "95497ed4987111a354f8441446760ded"
+ },
+ {
+ "dataPath": "params_shard_43.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.21.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "eef8d62c11f85246e6c16c3ad98ba015"
+ },
+ {
+ "dataPath": "params_shard_44.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.21.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.21.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.21.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.21.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.21.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.21.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.21.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.21.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.22.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "b07e782600d5c42b88115cc7cb356e5b"
+ },
+ {
+ "dataPath": "params_shard_45.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.22.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "34e23dd3d744583a2ed793dc93fae6b3"
+ },
+ {
+ "dataPath": "params_shard_46.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.22.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.22.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.22.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.22.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.22.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.22.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.22.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.22.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.23.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "e626b4823732ad6f2811a4e4c209eb07"
+ },
+ {
+ "dataPath": "params_shard_47.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.23.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c685f9d558f4e66c73cd1752f0e36a81"
+ },
+ {
+ "dataPath": "params_shard_48.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.23.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.23.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.23.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.23.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.23.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.23.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.23.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.23.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.24.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "71947e86bdb91d4c402ee303cf2d139e"
+ },
+ {
+ "dataPath": "params_shard_49.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f457319f2146339e214e22efe1bf9f96"
+ },
+ {
+ "dataPath": "params_shard_50.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.24.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.24.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.24.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.24.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.24.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.24.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.24.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.25.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "9cc5fe8bbb4e70033141d7b272dca809"
+ },
+ {
+ "dataPath": "params_shard_51.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "135eea4ec06a28d3350cc9df771e1af8"
+ },
+ {
+ "dataPath": "params_shard_52.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.25.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.25.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.25.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.25.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.25.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.25.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.25.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.26.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "afa274e391722047ffba2ac85d89c3c6"
+ },
+ {
+ "dataPath": "params_shard_53.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.26.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ef971876b32ee79bb0fe98778349c336"
+ },
+ {
+ "dataPath": "params_shard_54.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.26.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.26.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.26.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.26.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.26.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.26.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.26.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.26.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.layers.27.input_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "62b6eabb979a0ef12082812b36a3a606"
+ },
+ {
+ "dataPath": "params_shard_55.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.27.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 16384,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 25165824,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "bdb58dcdaaa1f2d4c96299b699b25a06"
+ },
+ {
+ "dataPath": "params_shard_56.bin",
+ "format": "raw-shard",
+ "nbytes": 26750976,
+ "records": [
+ {
+ "name": "model.layers.27.mlp.down_proj.q_weight",
+ "shape": [
+ 3072,
+ 1024
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.27.mlp.down_proj.q_scale",
+ "shape": [
+ 3072,
+ 64
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 393216,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.27.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 16384,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 786432,
+ "byteOffset": 12976128
+ },
+ {
+ "name": "model.layers.27.post_attention_layernorm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 13762560
+ },
+ {
+ "name": "model.layers.27.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 5120,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 7864320,
+ "byteOffset": 13768704
+ },
+ {
+ "name": "model.layers.27.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 5120,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 245760,
+ "byteOffset": 21633024
+ },
+ {
+ "name": "model.layers.27.self_attn.o_proj.q_weight",
+ "shape": [
+ 3072,
+ 384
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 4718592,
+ "byteOffset": 21878784
+ },
+ {
+ "name": "model.layers.27.self_attn.o_proj.q_scale",
+ "shape": [
+ 3072,
+ 24
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 147456,
+ "byteOffset": 26597376
+ },
+ {
+ "name": "model.norm.weight",
+ "shape": [
+ 3072
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 6144,
+ "byteOffset": 26744832
+ }
+ ],
+ "md5sum": "586e4703a2e124eff5811c6e5c6c3258"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/params_shard_0.bin b/params_shard_0.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3628c4e4391d49a091d75ecc48a981728d99ccbf
--- /dev/null
+++ b/params_shard_0.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6ada003c253a7ac90860ba05d6a649c9db0f4986392b2b7e44eda7f146e6bac
+size 197001216
diff --git a/params_shard_1.bin b/params_shard_1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ceedd2073d57ef10b1c51075a536eff17903e225
--- /dev/null
+++ b/params_shard_1.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f3c39b05cb4024fc0de916ac7aa2f3b2640b1f73f4a5df10bdef8d1af00cf07
+size 25165824
diff --git a/params_shard_10.bin b/params_shard_10.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2eb0d9060c9b26b85410622ff2aeeb50057356d2
--- /dev/null
+++ b/params_shard_10.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9101f9124389879e5dc9a247c889b1aa0a4bd87be59508ef3f7eef387c867f3
+size 26750976
diff --git a/params_shard_11.bin b/params_shard_11.bin
new file mode 100644
index 0000000000000000000000000000000000000000..990f277df7740cdd415324fe27105032621abb91
--- /dev/null
+++ b/params_shard_11.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ae186cb72cb9bb478945bf7a84da9a85210ba1d8d0c8e904d9537ab67a2b34d
+size 25165824
diff --git a/params_shard_12.bin b/params_shard_12.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b3df23b866c95ad4329249dec6cff134ad55d279
--- /dev/null
+++ b/params_shard_12.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fb5a568fe9c4e13140ea27b39b4b11a8b4568b8a32abefb9ea0231280a55939
+size 26750976
diff --git a/params_shard_13.bin b/params_shard_13.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c9c85f4ef92d0bc397f380c1f961f6c554fe7709
--- /dev/null
+++ b/params_shard_13.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57eeb940061f501a456bd10036bf85244d1c4b762835337fae0954f1e0ab2e5e
+size 25165824
diff --git a/params_shard_14.bin b/params_shard_14.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4784d2ad76d786c610c7defd7bbe13ad39c926ac
--- /dev/null
+++ b/params_shard_14.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de7f73e33d0c9b6289467d91a5a11891ae444c90bbcc9fa27ab85edff4686664
+size 26750976
diff --git a/params_shard_15.bin b/params_shard_15.bin
new file mode 100644
index 0000000000000000000000000000000000000000..28e42573d294a7532ddec726e1b1752103c744af
--- /dev/null
+++ b/params_shard_15.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33c05d969d55f9b1492b25e651f15167a4cfc57b3e9b0e9f82952a53b8a0dc23
+size 25165824
diff --git a/params_shard_16.bin b/params_shard_16.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e240e3721482be9addefa1243fbb63236f390852
--- /dev/null
+++ b/params_shard_16.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b374e9f52c621b50cc7a6057605e4aa49032ac49f3d4e3b81673106dfe98824
+size 26750976
diff --git a/params_shard_17.bin b/params_shard_17.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e29368c625161ad77ea21c64b375c787aed81b99
--- /dev/null
+++ b/params_shard_17.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1ac75e7028c99a21c2fcc4bc83abe19661708e7cb431fed0632143a444029be
+size 25165824
diff --git a/params_shard_18.bin b/params_shard_18.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cad14a3063b137d27d18f229a43548920d84402f
--- /dev/null
+++ b/params_shard_18.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0201c6583447c49426893de42b8c54edf3e907a73f7554f43e1cbb160709b1a1
+size 26750976
diff --git a/params_shard_19.bin b/params_shard_19.bin
new file mode 100644
index 0000000000000000000000000000000000000000..70f95aec4b4b922afe34a968424147f7637fd144
--- /dev/null
+++ b/params_shard_19.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5d85491ccc44d198e137376428c04833c724d808747e4fd58f241af6f0664bf
+size 25165824
diff --git a/params_shard_2.bin b/params_shard_2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..401fb0a0fc028c01292182451597dd34ca9fa0d8
--- /dev/null
+++ b/params_shard_2.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb96a43ff29547eb1ac762a975683d6a231d0840fe3a6c00861afeb9b68a60c9
+size 32913408
diff --git a/params_shard_20.bin b/params_shard_20.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cf3343eca108fd7211f62ceda5eae7b0a72b3430
--- /dev/null
+++ b/params_shard_20.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69c1eec1524873c7285f83e2925e9069bb4b85258cce7319ce801dbfbc368910
+size 26750976
diff --git a/params_shard_21.bin b/params_shard_21.bin
new file mode 100644
index 0000000000000000000000000000000000000000..feda7d4acc260de982bae1ff8fe072f61a4dd42e
--- /dev/null
+++ b/params_shard_21.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e4d9763c691671625c44164479c2283d44a9f1e32eb0c7b6971bab5362ee6c4
+size 25165824
diff --git a/params_shard_22.bin b/params_shard_22.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ca57c163fceafb1c358892e41ae50e9524f56fe1
--- /dev/null
+++ b/params_shard_22.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46cbcf03db47fcf56e32fe33336325d4793dca0e4a15c30b31c85856baac242d
+size 26750976
diff --git a/params_shard_23.bin b/params_shard_23.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6273ce97af8c3ac87e7cdc234eb95e04028c8a7e
--- /dev/null
+++ b/params_shard_23.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c538658ed6151aa7175ca38b0556f25e22802c968ac7a3f3ad02dbf9f6a96427
+size 25165824
diff --git a/params_shard_24.bin b/params_shard_24.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6864c4ae50b9efdca8e5c2c616f8956e79f9da43
--- /dev/null
+++ b/params_shard_24.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3459ed36807914ffd9d66dbb9f626d330c3d7d51388fc42f2f9e714e7cf0447
+size 26750976
diff --git a/params_shard_25.bin b/params_shard_25.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f4d81e55aceb536feb2395a07bd0469c6a1de6bc
--- /dev/null
+++ b/params_shard_25.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c88f6c4c68daab3549cf15cdc4354169a375a45c2f625dbda4b19150b3d401e9
+size 25165824
diff --git a/params_shard_26.bin b/params_shard_26.bin
new file mode 100644
index 0000000000000000000000000000000000000000..07a5ce08d27b939be97b7bb53c75ccace5823081
--- /dev/null
+++ b/params_shard_26.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d1bcbbd3029be455336f8d0c07a99c9fe712ea32ea62da5d241864ddfc89adc
+size 25165824
diff --git a/params_shard_27.bin b/params_shard_27.bin
new file mode 100644
index 0000000000000000000000000000000000000000..320e80343648abe86740ecc17dc204d3f7362b62
--- /dev/null
+++ b/params_shard_27.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abf4d347c852c829aaf76a72daa431952784db17077ee9051ecbec02632f40d4
+size 27531264
diff --git a/params_shard_28.bin b/params_shard_28.bin
new file mode 100644
index 0000000000000000000000000000000000000000..54ac7951c116d6fe9435c62781a7d4d2e2722aec
--- /dev/null
+++ b/params_shard_28.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aea829c0828e35c29bb67f8ab268b27320da55ba7b9d9607fa55fcb4219ed944
+size 25165824
diff --git a/params_shard_29.bin b/params_shard_29.bin
new file mode 100644
index 0000000000000000000000000000000000000000..52cf17ea00b526ee1f81f42296345ee433406e8a
--- /dev/null
+++ b/params_shard_29.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:627f77e539e41bf6fa882e479e8b433ec697c98d337d7a08d347595c0f307224
+size 26750976
diff --git a/params_shard_3.bin b/params_shard_3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..305ee34190890b1883571915171493bdc9b1f09b
--- /dev/null
+++ b/params_shard_3.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecb801f8beb75d93d232a8b905deba6a76dc2bbdc0a34356f81a0ebe51ffcdd2
+size 25165824
diff --git a/params_shard_30.bin b/params_shard_30.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ae1bb98a92e7a9daf93518469e06c6e900fad0f1
--- /dev/null
+++ b/params_shard_30.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1475c21382aedab3512bdc6bc56641de88f3998daef4f9d846c5ad3bf5c83e0
+size 25165824
diff --git a/params_shard_31.bin b/params_shard_31.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ded55d216028d5cd8abc0eedee4ecfeebb72a40b
--- /dev/null
+++ b/params_shard_31.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d6db8ac6c8debadcdfd53e49082551cf5bdbed96d0e76c48c57ab42b387c69b
+size 26750976
diff --git a/params_shard_32.bin b/params_shard_32.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f295554491c916fe580a4d6cabc255befaad3c9c
--- /dev/null
+++ b/params_shard_32.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1e894235ce8896129271049cbd08dbd1d80f3a91c5c6b28662d04557b1a149b
+size 25165824
diff --git a/params_shard_33.bin b/params_shard_33.bin
new file mode 100644
index 0000000000000000000000000000000000000000..aba8921b5133a9df8bcc3851a6a81e2196a2b380
--- /dev/null
+++ b/params_shard_33.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84958c0b467cebfb28837fd7261ae2ae0451f654ed664c83ea85a2e48605c93c
+size 26750976
diff --git a/params_shard_34.bin b/params_shard_34.bin
new file mode 100644
index 0000000000000000000000000000000000000000..72d869230185da3ba6587d5cffc138fd0fdc604f
--- /dev/null
+++ b/params_shard_34.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a8bb8033d3a1b92f06e3c96eb893c6338b3c12f5d1fda3414375181358f1f3d
+size 25165824
diff --git a/params_shard_35.bin b/params_shard_35.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ed6e08b0ef196f43775c9663370fa9f701a613cd
--- /dev/null
+++ b/params_shard_35.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7c54f5a93f24d917e9418db5aaad74e2a320796c07e11c022ea42489167e145
+size 26750976
diff --git a/params_shard_36.bin b/params_shard_36.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ece25ce8930bd91fbc332dc9eea6f4420aa59fae
--- /dev/null
+++ b/params_shard_36.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9917e7544741cc23ced61401004232a18c3028794d1a18aeb82438f84ea02db
+size 25165824
diff --git a/params_shard_37.bin b/params_shard_37.bin
new file mode 100644
index 0000000000000000000000000000000000000000..95f4d57d42f3bbc8757a4db4ab39f5ebb8453f41
--- /dev/null
+++ b/params_shard_37.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5348dc93c2401cff61faf0b887c6f964c5ad82a37c219b00fa68bb3bd5ee726
+size 26750976
diff --git a/params_shard_38.bin b/params_shard_38.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a96b90546a3c17ea04ce438e172815ff21cc295d
--- /dev/null
+++ b/params_shard_38.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2293bee2caa5ca6a771e3440ab3045d7ec0fb98d3b75660b1ddd9a8eeed3a724
+size 25165824
diff --git a/params_shard_39.bin b/params_shard_39.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3023abe1b6ec19c89e5c7ef3a4826fa4d5d0dee4
--- /dev/null
+++ b/params_shard_39.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80b42c34612b7ae64d855b7376fe3b15b07d4ca597ec33f7cb81deff5229ebee
+size 26750976
diff --git a/params_shard_4.bin b/params_shard_4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c785c2ab0ced0708cb592c2195c3377e7e4d9314
--- /dev/null
+++ b/params_shard_4.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5c45d0127a09afe9fed54e0c77c8c625a4a80ed9287d0b0f44489dda9a61095
+size 26750976
diff --git a/params_shard_40.bin b/params_shard_40.bin
new file mode 100644
index 0000000000000000000000000000000000000000..25bfdaa77c4e6e0c14e575a9ee6b4a9b0e71313e
--- /dev/null
+++ b/params_shard_40.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7db212c3477912d8145ec917d2478c20167e057c90d00543e8baf89f7879f664
+size 25165824
diff --git a/params_shard_41.bin b/params_shard_41.bin
new file mode 100644
index 0000000000000000000000000000000000000000..773309666b357e5bca828af9cdc2eb5bc8dd0339
--- /dev/null
+++ b/params_shard_41.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53a48e0a2e9b2e08502d4162625cf1bb28dbff830e89598bd6fb174d85eb7abc
+size 26750976
diff --git a/params_shard_42.bin b/params_shard_42.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a37f8bbe7deea32fafe3926317bbc2f22061d4bd
--- /dev/null
+++ b/params_shard_42.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f08ce6b7f4121da0bc031f432b2c99eedc841db754388b713daeda66f3cf724d
+size 25970688
diff --git a/params_shard_43.bin b/params_shard_43.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d5d96429b0c22e4e0a2f20909b1315ce7b2f4000
--- /dev/null
+++ b/params_shard_43.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:648f8d571e206b7daeacfa9e8a6594b78bce73a42f437cb6c52ac1cf1a6f9a0d
+size 25165824
diff --git a/params_shard_44.bin b/params_shard_44.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e3f429a7384955ae72af21e6293b186e8f4c21d5
--- /dev/null
+++ b/params_shard_44.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b8431e782348ccc03201142e0393d096a1b95b960c5d148377cbf36c386e240
+size 26750976
diff --git a/params_shard_45.bin b/params_shard_45.bin
new file mode 100644
index 0000000000000000000000000000000000000000..806dfba082ee4a52c728a00400c2d471b6b15b80
--- /dev/null
+++ b/params_shard_45.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba9c8dd6589065615d95c336e403d605b3c9063f08991dd0fc313476535ab099
+size 25165824
diff --git a/params_shard_46.bin b/params_shard_46.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bef06bd06b1f40b02242e0ac4f9e31261029a533
--- /dev/null
+++ b/params_shard_46.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37eac40c22f558d87ada1e489e839bebee08665f0d7f2b54d78554c77cd8b540
+size 26750976
diff --git a/params_shard_47.bin b/params_shard_47.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b01c7345dfde444f50ac436b8afbdfcc80d99688
--- /dev/null
+++ b/params_shard_47.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:473e95307da93b7c05a0a9005be96f75e2e3f0c278ba8cceebcf9de27e6642c7
+size 25165824
diff --git a/params_shard_48.bin b/params_shard_48.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4fbec380c71cf747a8fe6673b7d540edfc4804a8
--- /dev/null
+++ b/params_shard_48.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b27c427f59b4aa9455a031d239c728c51410ae2d82cceef314e4063c86a8129
+size 26750976
diff --git a/params_shard_49.bin b/params_shard_49.bin
new file mode 100644
index 0000000000000000000000000000000000000000..198a88da8fb86e91dfb6c51f5fb630bb8a7b74a6
--- /dev/null
+++ b/params_shard_49.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b3869b59aefadcb37097c70c8facf201208509ca88a0784edb3c97f91ef1af9
+size 25165824
diff --git a/params_shard_5.bin b/params_shard_5.bin
new file mode 100644
index 0000000000000000000000000000000000000000..172c9bebe21874ddeec2a8597ccf9244d5d0bc0f
--- /dev/null
+++ b/params_shard_5.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9aae086249d34656d250a4800b52650098ddfbd08b1fc9acd293ac830e069c7f
+size 25165824
diff --git a/params_shard_50.bin b/params_shard_50.bin
new file mode 100644
index 0000000000000000000000000000000000000000..73384cc38dceb805126a6845fd1c5dcfac7d1e3e
--- /dev/null
+++ b/params_shard_50.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e096d62d352f135ce7c24d792026286a643757927067201040a81aa7b143d5fb
+size 26750976
diff --git a/params_shard_51.bin b/params_shard_51.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d99da493f21718a7ba68757eb148b83bce11b90e
--- /dev/null
+++ b/params_shard_51.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0e2e09e16b1c3515f4fd6ffb2155f95211a3f8496c4d065720d0064019ddb81
+size 25165824
diff --git a/params_shard_52.bin b/params_shard_52.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bee0d825ed1e958ad181c207aaa3e22dbeca94b5
--- /dev/null
+++ b/params_shard_52.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d607b27a2809c534baffcb51a368501303e98ffe598855e1cb2be28fb02ee2
+size 26750976
diff --git a/params_shard_53.bin b/params_shard_53.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3da3165b9e2950ea8f6421522527db6a4045f789
--- /dev/null
+++ b/params_shard_53.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daca6dc6657944e0afeac0fa13bd26ac310d4c9d4b5bb1c07f76d7193bd04615
+size 25165824
diff --git a/params_shard_54.bin b/params_shard_54.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1271148f6aaf33105257e9f6291c7df89fe46eda
--- /dev/null
+++ b/params_shard_54.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25c7928e5999a9894cb344977262afdf6f5871cef2ecf6f55e32915d92c1d005
+size 26750976
diff --git a/params_shard_55.bin b/params_shard_55.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8e72ca3b434b691cc1066b4fd30b3bfa968ddbd
--- /dev/null
+++ b/params_shard_55.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8569bc589a8c7c451f669694aed8febabb5ff6db7f9ed604515bf381a4293837
+size 25165824
diff --git a/params_shard_56.bin b/params_shard_56.bin
new file mode 100644
index 0000000000000000000000000000000000000000..14aa57f161dc3e4d307017f74c090bee8979c593
--- /dev/null
+++ b/params_shard_56.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75c3cfbe3bf7daef624b1c8ecef25230c4da48edb50b3a8f8fda0eb170e1352a
+size 26750976
diff --git a/params_shard_6.bin b/params_shard_6.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cc42f85470e10d59e0f3fefeb6b66c94948079b0
--- /dev/null
+++ b/params_shard_6.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04b7e53b0e91542ec68c91c8dffc58cee6b5a686190efea6e547d7cef77473be
+size 26750976
diff --git a/params_shard_7.bin b/params_shard_7.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e66c0adf0cee7659579a3c9f1960aca60087a37d
--- /dev/null
+++ b/params_shard_7.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96c330445350e2c753ef029df75ad232539bf5bd2ae5a4cc09ccad782cc77494
+size 25165824
diff --git a/params_shard_8.bin b/params_shard_8.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a25db2c2560c4304d1e65f8221088526400d5cf2
--- /dev/null
+++ b/params_shard_8.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:252967b54f8b077149a4da331b7c77834800c16b8ee7f61f0d117c9d277feb88
+size 26750976
diff --git a/params_shard_9.bin b/params_shard_9.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fe9cec69ecdb979221d1bcbd0edb34f4ac8ef390
--- /dev/null
+++ b/params_shard_9.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d4a0d0628c755d99ae8cf8f5d5c0c59d02fd92de52bfa48f5334b1d2e893fb5
+size 25165824
diff --git a/private-llm-config.json b/private-llm-config.json
new file mode 100644
index 0000000000000000000000000000000000000000..d4ea8be6eba2583f74b9b796eb024813754a9e3b
--- /dev/null
+++ b/private-llm-config.json
@@ -0,0 +1,55 @@
+{
+ "model_type": "llama",
+ "quantization": "w4a16g128asym",
+ "model_config": {
+ "hidden_size": 3072,
+ "intermediate_size": 8192,
+ "num_attention_heads": 24,
+ "num_hidden_layers": 28,
+ "rms_norm_eps": 1e-05,
+ "vocab_size": 128256,
+ "rope_theta": 500000.0,
+ "tie_word_embeddings": true,
+ "rope_scaling": {
+ "factor": 32.0,
+ "high_freq_factor": 4.0,
+ "low_freq_factor": 1.0,
+ "original_max_position_embeddings": 8192,
+ "rope_type": "llama3"
+ },
+ "context_window_size": 4096,
+ "prefill_chunk_size": 128,
+ "num_key_value_heads": 8,
+ "head_dim": 128,
+ "tensor_parallel_shards": 1,
+ "pipeline_parallel_stages": 1,
+ "max_batch_size": 80
+ },
+ "vocab_size": 128256,
+ "context_window_size": 4096,
+ "sliding_window_size": -1,
+ "prefill_chunk_size": 128,
+ "attention_sink_size": -1,
+ "tensor_parallel_shards": 1,
+ "mean_gen_len": 128,
+ "max_gen_len": 512,
+ "shift_fill_factor": 0.3,
+ "temperature": 0.6,
+ "presence_penalty": 0.0,
+ "frequency_penalty": 0.0,
+ "repetition_penalty": 1.0,
+ "top_p": 0.9,
+ "conv_template": "llama-3",
+ "pad_token_id": 0,
+ "bos_token_id": 128000,
+ "eos_token_id": [
+ 128001,
+ 128008,
+ 128009
+ ],
+ "tokenizer_files": [
+ "tokenizer.json",
+ "tokenizer_config.json"
+ ],
+ "version": "0.1.0"
+}
\ No newline at end of file
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..52b2a02e365a89c04c6604df5ce9ba5f9c9b9bae
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f908f9b84390fd12c6d0c356765257846c53f60bf472ff4996a440a1e230373
+size 17209403
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3893338b11eee2616bcb489b679ab1adafe64c26
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,2064 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128003": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128004": {
+ "content": "<|finetune_right_pad_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128012": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128013": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128014": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128015": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128016": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128017": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128018": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128019": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128020": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128021": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128022": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128023": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128024": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128025": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128026": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128027": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128028": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128029": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128030": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128031": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128032": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128033": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128034": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128035": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128036": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128037": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128038": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128039": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128040": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "128041": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|im_end|>",
+ "legacy": false,
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 131072,
+ "pad_token": "<|end_of_text|>",
+ "tokenizer_class": "PreTrainedTokenizerFast"
+}