diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index d6cac6e5968b2d90fcea19d0358da9f05dba3c91..27f439e59e6bf391255bce67eef3415caaeb15ad 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ --- license: gemma +base_model: google/gemma-2-9b-it +base_model_relation: quantized +library_name: mlc-llm +pipeline_tag: text-generation --- + +4-bit [OmniQuant](https://arxiv.org/abs/2308.13137) quantized version of [gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it). diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9e7e08f5c8e277065c653a7c72987d064f55208 --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "quantization_config": { + "bits": 4 + } +} diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..a164ac7bcbf299703c449b3ec2742013d59395bc --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,84 @@ +{ + "version": "0.1.0", + "model_type": "gemma2", + "quantization": "w4a16g128", + "model_config": { + "hidden_size": 3584, + "intermediate_size": 14336, + "attention_bias": false, + "num_attention_heads": 16, + "num_key_value_heads": 8, + "head_dim": 256, + "num_hidden_layers": 42, + "rms_norm_eps": 1e-06, + "vocab_size": 256000, + "hidden_activation": "gelu_pytorch_tanh", + "position_embedding_base": 10000.0, + "context_window_size": 4096, + "prefill_chunk_size": 128, + "tensor_parallel_shards": 1, + "max_batch_size": 128, + "attn_logit_softcapping": 50.0, + "final_logit_softcapping": 30.0, + "query_pre_attn_scalar": 256, + "sliding_window": 4096 + }, + "vocab_size": 256000, + "context_window_size": 4096, + "sliding_window_size": -1, + "prefill_chunk_size": 128, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "pipeline_parallel_stages": 1, + "temperature": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 1.0, + "tokenizer_files": [ + "tokenizer.model", + "tokenizer.json", + "tokenizer_config.json" + ], + "tokenizer_info": { + "token_postproc_method": "byte_fallback", + "prepend_space_in_encode": false, + "strip_space_in_decode": false + }, + "conv_template": { + "name": "gemma_instruction", + "system_template": "{system_message}", + "system_message": "", + "system_prefix_token_ids": [ + 2 + ], + "add_role_after_system_message": true, + "roles": { + "user": "user", + "assistant": "model" + }, + "role_templates": { + "user": "{user_message}", + "assistant": "{assistant_message}", + "tool": "{tool_message}" + }, + "messages": [], + "seps": [ + "\n" + ], + "role_content_sep": "\n", + "role_empty_sep": "\n", + "stop_str": [ + "" + ], + "stop_token_ids": [ + 1, + 107 + ], + "function_string": "", + "use_function_calling": false + }, + "pad_token_id": 0, + "bos_token_id": 2, + "eos_token_id": 1 +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..82e44282542413c188097437ec36869b382c4f65 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,6425 @@ +{ + "metadata": { + "ParamSize": 507, + "ParamBytes": 4766153728.0, + "BitsPerParam": 4.068522985105678 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 458752000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 458752000, + "byteOffset": 0 + } + ], + "md5sum": "85ddc730c8a7f5f548d3e5857de426bc" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "00cf549b61c1818f57c5da869563eff9" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "5a6b42787fe8cceb9747edffc2cec6a3" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31911936, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336000, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 14336000 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 14343168 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 15145984 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16751616 + }, + { + "name": "model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16758784 + }, + { + "name": "model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16765952 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 16773120 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 31453184 + } + ], + "md5sum": "7f0bc91f877db774d4cafe2f064d73cf" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33266688, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 7340032 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7569408 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 7576576 + } + ], + "md5sum": "2dfd2bcb95e0e89bf975628108465d29" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "7b9c73b7945927ac0a375c8229da52ad" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "67a8610cdbc9077e829cab0f10b167a6" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "740225e4ce9ce4d779d6d2f3b0e2ea40" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 27575296, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 802816 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2408448 + }, + { + "name": "model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2415616 + }, + { + "name": "model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2422784 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 2429952 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 17110016 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 17568768 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 24908800 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 25145344 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 25948160 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27553792 + }, + { + "name": "model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27560960 + }, + { + "name": "model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27568128 + } + ], + "md5sum": "a749e2c09149d8b3f0b1a8663b7e17cd" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "22a4e019a6f0d56a494242bb00085969" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "f18c1ed4244a269bb187d0fe6c5d1411" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "192150e174eb20db725bd04eb85f4b6e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "3d9fa73c791524180aa7a1e71ea9b0ed" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "25051002a3d2eb08b49fea307a3db9f6" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "352e5c58e68e9e5d806ce77178fc0674" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "ee25040a305fb4f67dd8656388290779" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "4abe092da1fecce188dd6669192ec8cc" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "b704819d8ab9ef6cac7c5a936bf0870e" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d30fcec49d243cb163439d6ed2e5a888" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "4d6c7153ff08f2ae4ab79f331481cbab" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "74347901b831a39d74d917b4d3cf3643" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "88a48a340d8102bb5a61ebbc97c31c6a" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 24313856, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 22708224 + } + ], + "md5sum": "6ada8676816106d6786323d93e798857" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "252bea5480011d17cc00192ad847f9a8" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "a1c510c0a0547085b682bbe3c49ed175" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "58df2a73346145abe870e46d7d782cc1" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "89d467a609151e2afbe6c7fb0f1f75ea" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "54653e042c5daa40e17b8c961b5d18d0" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "d817777c1e8e5f0a5e80b4bbafd9aeb8" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "659f91e7ad93b8c7f70cf11c7a255f4e" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "519fe6793be6039dddc8736a975c123d" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "77d7b6204515535e0293eb6765cd064a" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "8d12bf225ae00fe6b39d99f0f81c633b" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "3fb2f7254ab06c61ca3e1899e05db753" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "0f308864db5827a294ae1694a422f861" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d189726509ccb3049c17e6bfc8d34dc8" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "759a1d4a26130b78dfe3cc369819066b" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "a14729ba0e6a991a57c1c9b93f6f4fef" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "89d06177461be77c3488639e0dae4a1b" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "84124abc59ca33f87c9c315dfdd8320a" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "01e5f1f36f7d0930efcbb825ba44d8be" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "53489b9553f5eb8d802b3f0f3046d8c0" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "d778563f031247d4d7227f5d74119569" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "630f3d721c171b0a26ed6a3591ba69b2" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "5462cbc19420547a2e6b54f0190163ff" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "c2e9d8488708770e04bd94d5a9d82a33" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "aaeb47576326ac0bf2eb6f9a7eaf934a" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "87cc6b6ec3f8d2d7677b30f0280c0e2c" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "243154c8fac8e9110feabbf80199a322" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "b2efbee647d1954e05a7dbb984ce87b1" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d91cfe945ceb15146455e8184d68d6e6" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "ce3ecc8672c5e39aa31fe4e326828901" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "1585c624b1b6fe4c6afbed42c1eb2d4b" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + } + ], + "md5sum": "38cf6f88ea787bdd3f91e89bc8be7c84" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "ae4bb8b8a7695d061d943cc125a2851e" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "30e4d679ee723d6bacf1a8bf80fc5f87" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "42fb8f55b79b54bf34cc95d7a6dbda8c" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 33546240, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15138816 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15145984 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15948800 + }, + { + "name": "model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15955968 + }, + { + "name": "model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15963136 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15970304 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15977472 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 16780288 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18385920 + }, + { + "name": "model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18393088 + }, + { + "name": "model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18400256 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 18407424 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 33087488 + } + ], + "md5sum": "0e7a25eecbf0a16fa6689e814d410c89" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 33266688, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 7340032 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7569408 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 7576576 + } + ], + "md5sum": "cc52fa7d8e932e6bee45c0c7f0b2261c" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "d65ecda28c7b24f1b745d9c93d40f651" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "5c161d2e18bc9f622d8cf537c5bde66a" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "26bb4aeaee24f0b71665b852c969e977" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 27575296, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 802816 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2408448 + }, + { + "name": "model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2415616 + }, + { + "name": "model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2422784 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 2429952 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 17110016 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 17568768 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 24908800 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 25145344 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 25948160 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27553792 + }, + { + "name": "model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27560960 + }, + { + "name": "model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27568128 + } + ], + "md5sum": "94cf7d0649a61224cf200241da97244d" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 33266688, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 7340032 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7569408 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 7576576 + } + ], + "md5sum": "a2fbd7e52d56417acbff44487adbe471" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "b977640bef40db16ad1c84ebe17b5db7" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "38a11314fb661334854f52db656cf840" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "994b44ed5b1d4a821270961d52fe7f50" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 27575296, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 802816 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2408448 + }, + { + "name": "model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2415616 + }, + { + "name": "model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2422784 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 2429952 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 17110016 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 17568768 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 24908800 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 25145344 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 25948160 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27553792 + }, + { + "name": "model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27560960 + }, + { + "name": "model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27568128 + } + ], + "md5sum": "d7ccb407731355f65d0d12695ad945ec" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "2ab13ccb329fe5d8d7adcf76cdf6651f" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "176537d806f5012f5f2f69f6caa17c3a" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "3c1a33c19abb6ce4793f5762365a8268" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "abc84d06a0e9ca1df8a3bd90137ced12" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "e33906afaa8e93d4d266a476bf527613" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "fd236653e334caaa5ac2869c2143f596" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "eeecdf12f947b2fc08a66eca7b12b5b6" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "6981fd2af8b4ce8ee8be92fdc1d46136" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "e16c5824c9f2b85abd83b6817315d2c2" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "e00aaf716b3ddd7f9f837fd83822caaa" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "518a4983e7c1f0da64acbab345902928" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "3a2dde70f6a32434a096980643755b08" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "ac84c6d006ae3b01be8555c35e5cc5f0" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "799a6f4686a01746670a76ee142c22a1" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "70e897415fb1af9b52b166a883267c08" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "c215174121b5033f3d9d8ae0c673b0b5" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "e200e77a4f24adb199a86ce1ddccca57" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "ac59b121fccdba84d17187ddd32dfac5" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d4dd7a11bf98c9d4a7949b6ceda9526d" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "36085b823838fa32444466855c45e0ee" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "e9b60154b3993ccf410cb9f296dd23c4" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "1dbd1de258659ab0d7c03d2995127b13" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "97957e8b10f5c00ee82307b0d63022ce" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "8fed37e94f59ffda8177b898bebc8844" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "82ac10ed46a69f6f5a897aec65b3cce5" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "a187852a1fa995501041923b0ec3e309" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "a535a4db95581ec3b05bf7765747e7be" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "0b67c3c3da3e7aff707bb0025d47541b" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 24313856, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 22708224 + } + ], + "md5sum": "3d9122887d019cecad6c80ab4349a41a" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d169e96c6657a21ff66f583699b0d808" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "978a8c31d2afa23925dbccdc39f17151" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "8f17dc58583e2de39873221e43fa4085" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25976832, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 23518208 + }, + { + "name": "model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 23525376 + }, + { + "name": "model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 23532544 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 23539712 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 23546880 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 24349696 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25955328 + }, + { + "name": "model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25962496 + }, + { + "name": "model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25969664 + } + ], + "md5sum": "53dc0ec7db6bb87a6dab3d6cf37dec8f" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "fd404359d91c6e2628d5d81ff72f1d72" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "c0831e2b894e36a9d94bc53b3c1c54e5" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.34.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.34.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "a5c16a6bba9e707e82a9b41cbca5e8a7" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "837f40900b1a6981322daf53f301c18f" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "95072e4e6bcc56d489b668cc1325f0e1" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.35.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.35.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "2c2455bffc1c5df5bf708f1d45b8f9f4" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "713007a1abc77367e1e77c6e97fc2536" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "ad92dce884724fe904d185e28023b130" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.36.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.36.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "22c179c2f9fef29359f51eda8e1c1734" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d28b236cdd1eac79e15fff3edad7fa3d" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "853ab05423b3618c391e984201bdf04c" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.37.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.37.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "e16e829a3c5aea1929de40d1ddf081b4" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "ad5719c33b79273152a5ab80d056a196" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "4a9c21d34321eb7b32850573a2dbc4f6" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.38.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.38.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "3336a553cd8ea312581a5b6b294e1cf2" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "2040b5b801732a019733d4c603a68df7" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "14d1b3b6a04d773b5fadcc904400a0d8" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.39.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.39.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "4aff3d4c4e44f852029fa708f04c8133" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "00f0ccd0a30e90f0c12afc50dd894851" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "33f1d394579d76fef312bec84dbd8f15" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.40.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.40.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "17134e26594d6d357717ed463c3d614b" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 1792, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d572bcebaf4ba71c337ad3b29a9175f5" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 448, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "beacc186df3409bf63892704b35bed5c" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 112, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 28, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.41.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.41.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "1a5603b7c759b444c7ed3d87a32f53a4" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 22715392, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 448, + 8192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 28, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 512, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 32, + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.norm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + } + ], + "md5sum": "e5b3d461a88f0503aaba10b62fa92004" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..80a8d2b9bdc231e72c319ac5e134452c04a1a937 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f7f2db24c4f29a7cd63f3ca1ce933f6836f746795ca8b174037b70410b4c927 +size 458752000 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..57c0b5fc21cf13e8c6feca3df3bc9a68eb4c6293 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b299b226a2dab9e6b8bced2d82ff1822617a08fe3cda399dd3f306acb5e5864c +size 25690112 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..cf195326878e488fcc13a56356ed74625e428b5b --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c97a4eea5493443135fb243c1fe7ae0bcfd663209b0207a0b85901cba654ea7d +size 51380224 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..34dec829dfab5eededf1e79d5b4b6ed45ef04299 --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80aa15cc4bd4e96e844f0490eae7c69c5244b3836b08a4b5f26e5c6d9434100 +size 25976832 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..8c6f771ae635c5c0e67c0f9e70fef16e16871cd1 --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:828b910b86d99de04273eaca8f0febb0e4b05c7afef665be3ae5a5a95dd316fe +size 25690112 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6231f66a82a5ce56abcb2aac0a8ed53c7d51692 --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0d35133aaa438ae704dd20bdf4ebc4a62921cbd912585b404cfa75a2ec4b2a +size 51380224 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..83b67612206f6502886c4ebb4b00710046cc6714 --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f896eabdb3290a7c6a3e05788dea9b49e63063a84751d8245605370af5825103 +size 25145344 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b4b530dfaa25a57ffceac5afbb2e5114ecac151 --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b20454daf1d5895019afc6fac56f3a65e3cfc8b27331b481e10926204c180d3 +size 25690112 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d659e4cfb22abdca225d6d2715db87c7351b67c --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f1da35ee268cc58474019f7c828eaef61e91a1e8f3852ed0f127c43b0d7017e +size 51380224 diff --git a/params_shard_106.bin b/params_shard_106.bin new file mode 100644 index 0000000000000000000000000000000000000000..90602ccb08ae8907612f27dd077edb809beb6f31 --- /dev/null +++ b/params_shard_106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f94163933098d93dbc130ab77a6610ed9cc0c19d828a34502c7a977d4fd5b67e +size 25145344 diff --git a/params_shard_107.bin b/params_shard_107.bin new file mode 100644 index 0000000000000000000000000000000000000000..319c3c8b766c8ca6b6a16270fcfae7453d201717 --- /dev/null +++ b/params_shard_107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f21c1f24792e8214d0bb4d4a1830b293661a59afad1d21dc58eac7b99d303e20 +size 25690112 diff --git a/params_shard_108.bin b/params_shard_108.bin new file mode 100644 index 0000000000000000000000000000000000000000..67eec1d06be9a2a66a1a8ac05fe9bfc3d8f840b8 --- /dev/null +++ b/params_shard_108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb0ac32bf65ba957d529812bcf3095482601ab41c471ab44c43e642edebe04b +size 51380224 diff --git a/params_shard_109.bin b/params_shard_109.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d5685c8811cf8dccd2c0b16c10d67364e9bad04 --- /dev/null +++ b/params_shard_109.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1856b2c79f3ca987c8481992853ce0b8b9a184ea8eb25a80fe37823a9b6119ff +size 25145344 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7008ffcb990c56d3fb6c174b2002e4a4d88589d --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:117bbafc952d0ff02c85655ca421dabebfd386490dfba5e32532d167216473e7 +size 25145344 diff --git a/params_shard_110.bin b/params_shard_110.bin new file mode 100644 index 0000000000000000000000000000000000000000..b49eb1d5cc4110a8cd538a0697421532e298c028 --- /dev/null +++ b/params_shard_110.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b7ec1d0c2f397c3b3fc6426975fd8c6104515f18c716155ea2f54733a5f6b08 +size 25690112 diff --git a/params_shard_111.bin b/params_shard_111.bin new file mode 100644 index 0000000000000000000000000000000000000000..25931d5a7f56a10985e0d1b197ce1ab80a97ea33 --- /dev/null +++ b/params_shard_111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c8ae8abe8f88d14bb3dc9d8d82ab5a9c95efc751a876bda1bc26a8359c07948 +size 51380224 diff --git a/params_shard_112.bin b/params_shard_112.bin new file mode 100644 index 0000000000000000000000000000000000000000..39b12e88ddf9e10312a9e1e52529d12fcc35b83d --- /dev/null +++ b/params_shard_112.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:636642ceb866a19bc97196e0dcfba9f0c4475107ac727d0cf9f27e29ecf65a7b +size 25145344 diff --git a/params_shard_113.bin b/params_shard_113.bin new file mode 100644 index 0000000000000000000000000000000000000000..fdeb81b568a8b1bf9676004a68b1778547f9c6da --- /dev/null +++ b/params_shard_113.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c873209052d13776c340464802bc3b18d14ae44ff0be7f43164b5ac13430babe +size 25690112 diff --git a/params_shard_114.bin b/params_shard_114.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce316c2313457770a67502f1ab6dd95dc5b93d11 --- /dev/null +++ b/params_shard_114.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f611b9227c72b0b2e65264889490886614e806e906d65cc15b8210aa5c91b6be +size 51380224 diff --git a/params_shard_115.bin b/params_shard_115.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2e055ac55f7d1905aefa1dba75dd50eadf3f6ee --- /dev/null +++ b/params_shard_115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cba2a9842712c53fd5e95591545d8ba4bd41a634f520d61d2d74ef8adccb004 +size 25145344 diff --git a/params_shard_116.bin b/params_shard_116.bin new file mode 100644 index 0000000000000000000000000000000000000000..c439ce8e5e705a6d66fa998e10ae8fb213ed887d --- /dev/null +++ b/params_shard_116.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081d552340ea534950ed392b29d0ee887afca142a41a5062416cca346ece84fb +size 25690112 diff --git a/params_shard_117.bin b/params_shard_117.bin new file mode 100644 index 0000000000000000000000000000000000000000..900344833587c05bd568e58095bef069ce4b87a9 --- /dev/null +++ b/params_shard_117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3073e32e3e1782b21e753abe75ed2d52785df33bff19d736b9db1debb70252cd +size 51380224 diff --git a/params_shard_118.bin b/params_shard_118.bin new file mode 100644 index 0000000000000000000000000000000000000000..8f8d4ef1efa4eb438b184feae74161c40a8eaa01 --- /dev/null +++ b/params_shard_118.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4435aaf4b535a732da15562fdead7f15bfc2f2552572ebef9aa27ac72139bf6c +size 25145344 diff --git a/params_shard_119.bin b/params_shard_119.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6d77a7844403f78af8902ee5e35fad92163094d --- /dev/null +++ b/params_shard_119.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d1e245ef7e88171bfc377811a8d086a49500971ad9ef0d71970d30ca768e94e +size 25690112 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fb57eec88806280c6567bbe817d665295db54fe --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04524f9903f7b23f39e14066377fe59071bcfa5ab6573ac854152b135d06bb33 +size 25690112 diff --git a/params_shard_120.bin b/params_shard_120.bin new file mode 100644 index 0000000000000000000000000000000000000000..b756b9614b5cb9b49af454adfdaf2060e6ba4e6e --- /dev/null +++ b/params_shard_120.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfa6f1794a5dc18a0dca1e852bcd7755816690fe34040c3d6eeede00e4dd220c +size 51380224 diff --git a/params_shard_121.bin b/params_shard_121.bin new file mode 100644 index 0000000000000000000000000000000000000000..31770d67a44a94b238ac29596b0aa2a14e010c3f --- /dev/null +++ b/params_shard_121.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb322be314a60dbead05f313a4bb61b3ad02a939b41e1d7f302320bb1db658e2 +size 25145344 diff --git a/params_shard_122.bin b/params_shard_122.bin new file mode 100644 index 0000000000000000000000000000000000000000..b6cb7b53df352396b64ac5b0b1cf423013b58325 --- /dev/null +++ b/params_shard_122.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21d0ebb474eccaa7bed34d70ee8be149cc486dd2aa89902d8bae525864d38d1e +size 25690112 diff --git a/params_shard_123.bin b/params_shard_123.bin new file mode 100644 index 0000000000000000000000000000000000000000..5bdaea70e7f565a6a02d5d893b2abc4171d33777 --- /dev/null +++ b/params_shard_123.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500e9cc2170acf419797427c18fa2f2d94e2b16c13e8a81db9f3031267ecfafc +size 51380224 diff --git a/params_shard_124.bin b/params_shard_124.bin new file mode 100644 index 0000000000000000000000000000000000000000..b08e61d1b2861390c536a8efedefcfed4174e2d6 --- /dev/null +++ b/params_shard_124.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd930914a06650d248f9f81542bdc348e1ec8a93289faff87c44c18086d5592d +size 25145344 diff --git a/params_shard_125.bin b/params_shard_125.bin new file mode 100644 index 0000000000000000000000000000000000000000..106404d33aa238127d192cafc7b2cf2c78a560fc --- /dev/null +++ b/params_shard_125.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f4b46d28fbdaf8a17264ef1054e02ebc85ad265f834baa0f686c36d626418c +size 22715392 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..dea289bb1d783b8abaa1df43441ff1dd5dbab23c --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de97eaecc00ba0671df350abb1eee16ad3952c1f33931d171479b15bcc5b873 +size 51380224 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..9436aeaf95f42d36b1430e1fa5c69065f1254731 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d0d1ab243a733a055e5c4388358b9aa3c89afb4197fbb103cdd5353a54b803 +size 25145344 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..e744df17f6acb1cf50676ab03f25d1e3dcf0d9b5 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fe0e4f6f8698f52aee1e973ce1d4805001feeeb270a96872b0968c796269b21 +size 25690112 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb248226dcae6b7daa6330cc377a8ba21e2bcffa --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e4b5a2880527a557c61e5ec8d7675c167e42e637d08f3c9aa20536733ee691 +size 51380224 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..71084ac2203ef22f0bee098dcba9cb6c23d8c187 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd3bc4a05e7a6472885448d463a9dfc52d0bafad13584582ff6f756526af5652 +size 25145344 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..2eb366e08d7f46306b5f3ca1f1899e91998977e7 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2fbf7b24059c1ca81d6b672f3e1c903b9147e8379042518053f7a8872b551b7 +size 25690112 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b744c441bb71f3065d70dedb2bf851e3e08f48b --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dad5b8a9f17ef32bc45753d980514dba3316bbec2c4f38d8eff6decee84e96f +size 51380224 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd1afbf338e40d0916b6b7b546c70294085d1f3c --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aefce0d8c44996cc538fd6a67f61ac197b26fa2ec41d0ba366db28adfbc3d47 +size 51380224 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea14fb3876282b73e9db6b606047672a7c51ff51 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe79d02cc6a27929cd45c974e8fdcee28ae5ce53c6a641428395133d178a2423 +size 25145344 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..1070538109a469ea7580b9a4d658502ce4598c97 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f83f2ca6d65f6fefa617b4f43b91dd3aefe9a5f7351a7cced59e65d88e1a02 +size 51380224 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e6b296d5b82f593c5dd058e8d7ad517b6c52b95 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cf2ea5830ebc3c4b609d1030713eaceb2d7c39f96d2f048ef24bfd533b4db6a +size 24313856 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..07cb4daa5f1c1e9486d5206d7f5a7f173901a031 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c38be80ab0b55e7f5111e10ae67fec4de07bb73192718073582f418a932806d +size 25690112 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..b049c8643687a3b35e354981ffd6ecd09eff574b --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2101db93c8b842aa3bd963da327c810536666a1feb97398d69c52eb9c2e78a59 +size 51380224 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1424a3340a2a4ff67c0ebd7260158da492cef2f --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7e0609a9e632b8883ba0841d6fd6dcd431e9c3d293221eebfdbd1c3aa3b6f8 +size 25145344 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..8390e865c6568dd3db5cb8fe6dfbdea3dd29ec1b --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bfa89e39453375e1e2726e2c2b8dd211e4313824e8a0c0de613806fed3665b3 +size 25690112 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fd78422deb75d3a2746fdc4da582d1333504a2a --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bec77aa11afb240fc90ceaf80f5ab2da8f2c7f80b948e1332d18dfea3ee82624 +size 51380224 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..55dbf9117a1126cf4fe501cc99c43de0a0d34df9 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:632e46457a4ae5fab48e1bc7c341e1a638864cc941637014363446e7b206663a +size 25145344 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..162136438e8c0011a3c12f57720ac01fc29f741a --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c17c1a2b7462f3ae46d28e3de1e23e8f13a241bcceb5cc97d15c55d6840f7fba +size 25690112 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..50f42f81562d6b1471f7234c1dd987cba529a4af --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f653362b2213e707de812a73cbcb3a65ff2d7f7f9a32cb7ba586aaf133b44a +size 31911936 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..7995b468ab70068e9d941b45421903deaac02be6 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7dd90338a67570de4086ba218c48dab5b8b3b459dade82b3f364e0c9dca5668 +size 51380224 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..fa9845b8bcbf6ba61b475d611590734ec3550224 --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e43439e9a9fd268562183557b433de8518155953bae18f3ccd5d73c9214c9b5 +size 25145344 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..f346c98acc09f490ceefddea7e32ee38d969fe5c --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96975f88e0f7ed27cd3d79adde4f9b4cc2900d17b0b2b52e95f5bbc044a7a848 +size 25690112 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..2775e9f598e390ae45a3669ff219389a0295f602 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d8e2f843c5b9ba3ebebad4fc82c26c6832f1ae6adfcd4e27817bc79bdde47c +size 51380224 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..3badb29b27a02b3b9b12c12afd320ff8cd4695b4 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79d6a2556485dc85b2113a342971217892af854354b71b721bf666534a37ac14 +size 25145344 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..37de9111aad609851f20130cb0247a49c294fc26 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cad19fb6d29144073e98f01cd536f2f6b98ef179013ff712a309e32dd79f80e +size 25690112 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..eaf128819abec82b57a1d77cd705c49c3ff669c5 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788ecd716ec98e9b320e0e0a33347d8effe71531e7fc3f2c28b6dda6659050dd +size 51380224 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..563189d02f60fdaf9a9c70ec0753665152ed9168 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a668ad07d60c789b99bf6d964fcd635f1a437d3425680c849d6a571914aac2b +size 25145344 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..b667fe22890283e54720d802fafa35a640370133 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6609efe7da5bf628c07ddad37b4c990aef5aec4f1b9a356fcbcd31a8b9ee53ba +size 25690112 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..236b3d4e1aa8cdecd129d23b6398a20fbd95e7ed --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e42a15b1c5ed8c88a6e49198406957ca3a1df38803bbe9c6c95bc59de276090e +size 51380224 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..46f3fb469183f1b978da5ad428f98145c6312afa --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4b89a81dd9d55f3288ac489106bd556ed5fa119018477be763823d7f9dd813 +size 33266688 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae38884c7c9a93a51ba7dbe053141a220d3cf42b --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:775fad600b260c2b171389d001d94bc2a2f740f039fa4a5c80fd65ea884ced77 +size 25145344 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..5379ea574606a2e04d401a808e2c792089d541da --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd2902b558c62bbf79c84f59d9749a7c6be77e2ddd613659fab43350cacaff9 +size 25690112 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..11cb439af72c3a8b6bb206be8917ce5ac11c5218 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4254b2c94f53116df5177c87bd1df608efa86838e1bf2ce8d72c9eb8dcd1115a +size 51380224 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..a852723838617e53da7eaa39db36af4fe0706a1a --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d5dd24f90bdf65310ae92d8f6b3e3d778efecd50b9e3853a1c9796e1e036358 +size 25145344 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..37695e06476e976c3065fe5af037854cb1d0d9a5 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab66d2a73358f080f6e4fcff1739c878e009b9c5c3857e9e0038504333655c42 +size 25690112 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..773c355b0e64bf886c02d5ffea9b8d259879eb02 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb9f8c321ed4c6b796235f1eb23e19f5ee8f6663995cb6baa03ef929fa93717 +size 51380224 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea0bf9495845babb7468aec5bd62d5b30d33fb91 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534ab60deeba98f6e35ca7c0dcd1be8e4d286b7bc4c24f4018ed9b2cfc078943 +size 25145344 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..36b3dd4774a9f60d63d57af198bfb72b94b17dc3 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b7326ed7e2baa5e08b1d9644d59accab76fa7d8e7a91c68d67d225af1a12ff9 +size 25690112 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..f324cc127be19f91e16ee2b81192e6d76d24141a --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:494b65d2deb3a4be0971df503cfe56b71c8570293f7137df673bf685ef2c91e2 +size 51380224 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a366a5e0ae90150ec8eeacf7c9c8d6a51dd0329 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d66295925c8a22bd3a1906d77301838639b21e25f16ed8ca6ad4f44b5f31993 +size 25145344 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..c12dce8c82b095fc3c5799476976ca3935b7edef --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30bc0fecbe2812f94c4deb2cdb3d246e5e0a125d357cc5054866d7b3a63a1be +size 51380224 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..81c931b9aca05bc389af57b56c235e2288fada07 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b048328b4b63ea16ac873bf26814518bd7ecfdd7fcc1b4d429bac8184165fdbf +size 25690112 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..71df30b765789e835fc92e369b39d994fdb5ac74 --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9230fa785a8d98addff9e429dd70ecb4c5a04530e1f5bb1e7f7b923f21bc036d +size 51380224 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..737cb8c92c46e7d968094ea4ed70e4861329062b --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f692f8028adce0bfcc1dd6cccda5a75a21840eb4912529edd51149fbead4164 +size 25145344 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..33b152cda7e673c2f57ef5eed78623f96fb225bf --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2521bbd9a9903b5a5327f123fd44d85213491f167d011c745e844eb5064597d +size 22708224 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1063850943fd054c3d8ed147bd8fa9f1427b805 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8a872b0df4248074c258cecac4345ee505702033f12e3ee1685aa62f799c7cc +size 25690112 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..11d38c3c4f3452a1bbf5c41ae79d66b08c6600be --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4db0e7fd5b73356a7467b270d9327c4a649d636e4bf7993b295879defa8c1e0 +size 25690112 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e41253751d7ff01b861cafd37c1a4958858356a --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a7de320a26434a26bb4e0babcf529d12bc8b37de4664f5f47a95f82b0e69d55 +size 51380224 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..da654cb38da38696c062d27c325f5a70afde4533 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a02d3d3bd83ee92376ab9d30152534b94460597fe139fe4e81a77415a3a651 +size 33546240 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..88408f7bed9a7f4eb7135984240817559ff039ef --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b36f523c83b88e7e063222fee55beb5825c39b3be0d484f65174554d4bb21bf +size 33266688 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..6795b96b73c4d6748e589be4e1ea0580a4516905 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9439a5eb126dd9df57a6c4316108e04ed43d1d55ccf13d01b6fc1c677bc74e3e +size 51380224 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..5aedb414f97cfc6498787598db3d1136716b9508 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea214aa14cc846f427f02547a5cfb0c594c9e7d10323f6213d176a3f2558abe +size 25690112 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..fdc24bbdcbf2f99308fd7caf907c15510912fa46 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca59047fad5c46dd6c4d345a51db8fc2a2b396aa4586ba23d4c6fe12f2c35eb +size 25690112 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d43da56f057b5942b126d857647bee62793de56 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c735fcc47628512c48edf49c0fba7a951fa59ce42e9cbfb896518e7d51ea1c5 +size 51380224 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..39806df76ee5e50e2caae035deccc7d81d19b1f8 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aadd154beeaada136d5c686fe2d285c6ef52273e9d4c87c91196481dadca4323 +size 27575296 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf30fb060b04ab3c169158a23fc6e16049e3c0de --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff54f14afb00617f5c421fc1794c69ebbfbb81ed0a5df84ba2651e3cfac4d96 +size 33266688 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..d249a0614971ba675e76102f05a756b432ff0c2b --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f61ce02d26a6d953d789ebf88fb10d739c4df64b3e80f23e76cbbd97ee69396 +size 51380224 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..3700fb7606d42a2aa41f17df4b8e261678e26779 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52360758fdadfdba08a841622e5118d7efda122b2806e2b79ff33e3ec95f5a50 +size 25690112 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..75b4bc388de26af9d929e3a3243d4d37223d0bd5 --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:272c9d9d30b865452bfdb2a31b8ea858c24627fb2863bdc4f6e0ddd5e2d50e5f +size 51380224 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..a229a19653c0c4f86174f1ea4666843a3dd35ae7 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee3d200cfbbc1f7545fb4dce9d07417a89a2c6e042f56cbcca266bf121b7f40e +size 27575296 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..79b748769b7f14894f1f3fe01518b755c07286f2 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c65f8a82ab4a3b0d2da5bafcd9c9a51f73670b2e04ef030dba42200fc44c7567 +size 25690112 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f5c283d4e99fec260e6e3851e10f2b39e612c3e --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a75b12c3914347168ff4539d74f9d345aa73cbbe3b791cd8be1632dc514429e +size 51380224 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..f927e0a2784913f04b8865931e5e1b702df0a814 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a598efb7b19d0a3e8203fdbd024769d5a34c76fd927f4ecc2b99a1d6c565e625 +size 51380224 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..f9f9e30b7fdba953ef434733c2d8ee25fd33b850 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7ae39150b23c2d688bbbe06d6ae38282b831bc3952246b5968c01906a76370a +size 25145344 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..666eb58a476e271b89d8115585dadb0b0f856257 --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dd882d5d60232d8d4e36dffd27ed993035403c69b77b5fdce75702d29f593bb +size 25690112 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..a39b04efcf6f24ca98eccf1a5841ab1aa66d652e --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5ad42aa3064ecf6b5b357774badc383a66a85a64f4e4f49b8e926cd5abd489b +size 51380224 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..e37d0a174f75cd5c3a6317cfd80e8cae16af9fcd --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06cfa1da63a670c61356c1aa938dd64dd4e067a8c27d0441fc975cc866c2585f +size 25145344 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..62187f4fb026893cd367cb8a2dcba7ba7a553083 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fe1af90acef94efd99014f8e9cde8aa41cca6fcd276a2f2a4a7f5301c1e6e27 +size 25690112 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..06fb338a054bb3b45f1a2b49f5f0b6f41c861319 --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5a82c0277fb57da33907e3184688952890250fec3cac9d28667ff551e25e5fd +size 51380224 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..2992ad21cdc956baaa3e94c33ac9ded90ccf3609 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3de31094d0c1dac46e344e41137686e20f0b1d70e2a2128f6fc4ff83807e82cd +size 25145344 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..a40907fdff84695ccb2cab40ae79ccc0f4063f9b --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e83982d7ee325eee0a117bde1853852c5a8db6a44bcec99eb62815649d2c43db +size 25690112 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..32d79366624e6cc286ac227868518c25974fe47a --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6ed229ab89c468ce2013eddc126b6f0029bc142f8429d05c90473f85510d891 +size 51380224 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..179af50a1d8cf0ed61f3fabfae4ac06c6aecb96d --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:778d73a65d73e0bad8166b9dd3082981ad662b60b86a4e5a5d527748c134f6a6 +size 25145344 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f281753a50af45b0262117b24bf0e8078d9e695 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4618415d57652465465a9b4e9df79ebc0730c69ccf829c19c69329e5444ea359 +size 27575296 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..bfbecb2ff634ba25f49ef61afd3525a3c4de905a --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9a6934f1f64c7abf87a6a7f9897a223555d825e80e19ca4f4c6f56c550c98c +size 25690112 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..c080eb85ce7febe7699133f40ac39ddd279300a6 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa1f1c09951f243f8b68b619588b36722a52d376424c552676be8ea0d8604fe2 +size 51380224 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2e367338b42a5517bb52214c6b9b019a3096995 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94a609696cba953a2b01367a70c9ae9c4b905938c83cc613ec440857570ac563 +size 25145344 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7f851ae4185ac2eb00d407e86b496902748c737 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02196e66739e7cd8fc008ddb1552d064263de46d745072bde244492e9abeb88f +size 25690112 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..3abc2cb7086dd44ef51a106e03856b1e0f2b2be4 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4f642a5562e672a5dd08b1ae5ccd92004f32f2f624dd539346e219e130d18e9 +size 51380224 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..04cbe258e73d96c2d4e35280dd62ee5cf745c8cf --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352cd033682c9611800ec4834f32bb1fe66f714ce8db727128876792d47cd2e5 +size 25145344 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..6df1251cde66f42500ab776a2064ff0334257eda --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957a517f6ea6c012de51d6d44de358da6cd5d773bfc31b8909ee711f9c231582 +size 25690112 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..7174af589adbe9d77534f93d83bd897862d94a7b --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6058a6b38c7c1745a68c198a06e39a4567db252122e1b5ee9604ffb5d92a061d +size 51380224 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..26fb77ada2e4cd883652d436548645ea4a08b542 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3447b258670fb8d8b79ed1569ee78bd2df3690bec1ade6c4b6ca95285651dce +size 25145344 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..acd06aac4099de0789f6da93a6f120cc647701c2 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a09c290a64b20f032a09d4b131ea17e91a45aee951a75984aa4cdbb22bfc7aac +size 25690112 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..10b26ae69f13ab11177e72396c599d24aa4add26 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eee500e01622dc31f52d3e0903a82632ace700343b381728973a77d3747c5372 +size 25690112 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..d6bf025c70c597b6de72316330d982c8a04d37cd --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81b14ccdd2cfc3701d3ae54812c044dac2f1ec0f9d0b507ffa2c20c8c1a11a00 +size 51380224 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9666f7bf7bb94ff321a1a894758aee8c1b9c141 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f483aa6e625e2595c31569bea95f79057e233eb2d98dfa3e5bd06f2982c99ec0 +size 25145344 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..e054a83f30dbc36e478a1fc5aa1589a48957143f --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13150eebad2757c0128c59a774837557cfec28c0c3954da2db9434a62fa44223 +size 25690112 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2e5656768934163ddf24ec8eb8c2fe09a4656ab --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b367aab6179f0cd1c5155ec8c8ce167782ce7f7a7b386e62fffb3b05b885b33 +size 51380224 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..45611d38b0769e053533e92d413b0941d1c4c1a1 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:705d2a0b4fed5a35fc78eef21f48b16df69d95da4b5e436264177671ebb6302c +size 25145344 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4178202983b26d7ccb283dbe16f60394d07fbbe --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8334d7d1e134e457829f52e23dd7df7980800314442f69decb51084f28ad284 +size 51380224 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..ec4e2df43b485ad00e5e87d254ceb07a0ca3e746 --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46eb85cca7c6b905050e2db7ada2f9cba3e784f4b84102448618ab6c708dfe70 +size 24313856 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..4de2b2d14a0ed47641f3a40ec37b1334079f913a --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac68a67b613673d55e779dc177fcb144f062726edf6e0d556eb8a93eb78005c7 +size 25690112 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..805ea0445504fc2632e94f9dfdbe8d0fe34dd223 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b465b4c661f99d693e0eabd971d6d183d274711c61e4e9f096c633e0b8c373e +size 25690112 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..64601ad9ab8381a871727ea00c907afdd6d9e0d5 --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:931b033b0c17f7b68d3368046dd46b9cb3ca5f7115da4c6e0a3962748a877cc4 +size 51380224 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6121403a9d9cf762d977cd59364bd6740551a0ab --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2014 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}