diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..75de25ff88320f6a5f9571b3b219d76a1cecce18 --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,83 @@ +{ + "version": "0.1.0", + "model_type": "gemma2", + "quantization": "q4f32_1", + "model_config": { + "hidden_size": 4608, + "intermediate_size": 36864, + "attention_bias": false, + "num_attention_heads": 32, + "num_key_value_heads": 16, + "head_dim": 128, + "num_hidden_layers": 46, + "rms_norm_eps": 1e-06, + "vocab_size": 256000, + "hidden_activation": "gelu_pytorch_tanh", + "position_embedding_base": 10000.0, + "context_window_size": 4096, + "prefill_chunk_size": 2048, + "tensor_parallel_shards": 1, + "max_batch_size": 80, + "attn_logit_softcapping": 50.0, + "final_logit_softcapping": 30.0, + "query_pre_attn_scalar": 144, + "sliding_window": 4096 + }, + "vocab_size": 256000, + "context_window_size": 4096, + "sliding_window_size": -1, + "prefill_chunk_size": 2048, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "temperature": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 1.0, + "tokenizer_files": [ + "tokenizer.model", + "tokenizer.json", + "tokenizer_config.json" + ], + "tokenizer_info": { + "token_postproc_method": "byte_fallback", + "prepend_space_in_encode": false, + "strip_space_in_decode": false + }, + "conv_template": { + "name": "gemma_instruction", + "system_template": "{system_message}", + "system_message": "", + "system_prefix_token_ids": [ + 2 + ], + "add_role_after_system_message": true, + "roles": { + "user": "user", + "assistant": "model" + }, + "role_templates": { + "user": "{user_message}", + "assistant": "{assistant_message}", + "tool": "{tool_message}" + }, + "messages": [], + "seps": [ + "\n" + ], + "role_content_sep": "\n", + "role_empty_sep": "\n", + "stop_str": [ + "" + ], + "stop_token_ids": [ + 1, + 107 + ], + "function_string": "", + "use_function_calling": false + }, + "pad_token_id": 0, + "bos_token_id": 2, + "eos_token_id": 1 +} \ No newline at end of file diff --git a/ndarray-cache-b16.json b/ndarray-cache-b16.json new file mode 100644 index 0000000000000000000000000000000000000000..671635b44efa5d442ebb062eae572a4a6b5c378c --- /dev/null +++ b/ndarray-cache-b16.json @@ -0,0 +1,7657 @@ +{ + "metadata": { + "ParamSize": 555, + "ParamBytes": 17019832320.0, + "BitsPerParam": 4.59034047349219 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 589824000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 589824000, + "byteOffset": 0 + } + ], + "md5sum": "33c0315e1fec4efbe2820fd1652b3907" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 73728000, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 73728000, + "byteOffset": 0 + } + ], + "md5sum": "63ec10e87949fa56acd5798401501776" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "0c1cd6bfe199f79866d2d724743715b2" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "dbc571257f7b5d76c501bbb64e80e44c" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a9244717eaecf8ca6572d9f064c14f30" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31887360, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 9216 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 10626048 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31868928 + }, + { + "name": "model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31878144 + } + ], + "md5sum": "b3620e1fe86056b27723263a9e1f1f4b" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "83a7a0e3cfc5882557c90a7fa75fde19" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "a1e94ebd955ef9311b1e55cb34ac7f8f" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "d28bb21824551bc8ce83b15deeea610f" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "fc1e10138f503e77e5994049a618a987" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "125d51bd2bea98fcba3662f08f2f27cf" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 31850496, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 10616832 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 29491200 + } + ], + "md5sum": "9e0cd8818ed2d1f9b234893e0dd4903a" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "be551132e6ce6659c7c4651e5a32456e" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "c752a99e269bdeafae717c84ac2916df" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "ea4cce00773b56c04d9d6547c4dfe64b" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "e66624ba48784d866d64c02b62ef1f8c" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "05ea1d140d8823a2cd282e166214b407" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 31924224, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 21279744 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31896576 + }, + { + "name": "model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31905792 + }, + { + "name": "model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31915008 + } + ], + "md5sum": "5b33441d7161a72475c6b86cb263a225" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "5af4aeb9c5c82fc9db406ca38737e4d0" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "f4bebd395193b56ae8113c0c8efcde4c" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "c5da438a9eb7ed33b6859e4a32121f34" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e440391d9c597de6ee9232d21faed545" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "308371e04ab12170dcaa59e97661ff76" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "770fbe7c4cc436fcac1d735099c8e394" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "974695a1d4ea81d548ac7547ea344f52" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "3dcf6811bbc321b97165a29fbd981981" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f0f49d462679eb0c55fc85dab9a6680c" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "3635e35c84bfa6b5f0c665546a9a76c4" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "1fede7938558fd9e2d5d063928f11509" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "b3215564782d6a49a6f8c0922e47df0a" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e9a0d20a0e90b5cccc20125107bc2579" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 1179648 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 1188864 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 11805696 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33039360 + }, + { + "name": "model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33048576 + }, + { + "name": "model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33057792 + } + ], + "md5sum": "c895528a5e7a44dc89901071be2a5353" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 31850496, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 12976128 + } + ], + "md5sum": "046ea54972b6ef10814ea6a2bf51f773" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "282f916d4271bc4849cf84c64124706e" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "17b7287870ed22be2de780739d055bed" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 31850496, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 12976128 + } + ], + "md5sum": "f88d1a5192037463d5aafdd161339cfb" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "4868c9f373189931a3df71588d2873c5" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "9c882e86fa67ced8c515fa4d3df4a74c" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "aa6e2d9bc22a4ddd1cb41ed87a7ebc6f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "dbb8db49f555a076087a294a5a56424a" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 23639040, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23629824 + } + ], + "md5sum": "a64ca3df72d305d246accbe8a50fd2c2" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "2672900bd06207d568034d3774ea270e" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "37501ff6c91f2d3297f605795fc552e8" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 31878144, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 10616832 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31850496 + }, + { + "name": "model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31868928 + } + ], + "md5sum": "34370fe6b9fa1d6a0bccd8a2350f6e13" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "cca45de3c8f5bc6084edbb3b2745c76f" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "240f55506b49f8f9225bc95c72d2f4e3" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "479861316a1012b680a3ce2ecaf39841" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "303bbea98b39981cb1b569f4a7bdbd22" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "055b841ef40ac2beaf03442d17f32ae9" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "259073fc5b03146a23a5645f3bf9b942" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "6b20344982979cc956e6e7e7d5625698" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "5ab2684bbd750443acddcb014018881b" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ef7fd69a8028e5100af953d630b2b24c" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "66758291c25300504c1464788389343c" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "5fd2531dccf85e7b8a195bee66b6c485" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "77303c8c73c293c2ddcf7e13d684b971" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "b6315c2670f65a7058fa5d90ee389ace" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 33076224, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 1179648 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 1188864 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 11805696 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33039360 + }, + { + "name": "model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33048576 + }, + { + "name": "model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33057792 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33067008 + } + ], + "md5sum": "f3aa08e0c1f1a912e80250fd97a931b0" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "f876c3da70d3298d36478575d1e2dec6" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "9ac71de09dac7be0a550577834e54979" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 31878144, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 10616832 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31850496 + }, + { + "name": "model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31868928 + } + ], + "md5sum": "74bbba64eea2d25225a8e141b6dc8605" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "4fcb68f0197865e0da3bd5a5054677e2" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "913523891a68e0822e4c520f3126f08c" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "406663acd3b5cc9f8bda8f05b16dcb16" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "177f5142c9fc77b4524afffb8a770b86" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "f7b5ace8da156e3d4c79c29f9ab479f4" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "d9d669d64e38939b1226ab425eab2715" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "249aaf1ba823c93629adc65b5195eb4c" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "fb42cdf53d472fa6870768a90b2e3347" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1b110b2736a0a13abe59f9be49b9c36e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "64fc3d5498dd0585e224e3f787362e71" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "e97fe2f975256473e06716abac854a8a" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "583b058fa6ea63e17d5fedd262a20b53" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 24772608, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 1179648 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 22413312 + } + ], + "md5sum": "3e7fdcf31880becb794d83c5e45d0a9f" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "a9d432229c9f6fb00cb3b6e4825624fa" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "3ee71345500db399d249307246a0fa24" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "75e4bf1e0c6dd66625fea1fb8c953242" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "c951f58a71d96e62621b5c7ee515f5b8" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "fe96b2e16b5fc06d155fdf28d72654d8" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 31924224, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 21279744 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31896576 + }, + { + "name": "model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31905792 + }, + { + "name": "model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31915008 + } + ], + "md5sum": "87f4ae324cf337d27ea8d44a4e91c254" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "febf39d31ce44d169f259985d0ffb0f4" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "3e28792bfb792e8114de7f748bfb2c0e" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "506707b235a017d2a2a45ff5a8b5e39e" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5bb034ed2695face6bb364ffff10e5c0" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "afd27488025f214a9c15d1b091ca5c50" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "496288931ea30a9cf7ebd267d72b2dca" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "7b64b614e8a62f125e6732033148ea14" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "540ca6bec424b394e3d318cc9055826a" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "82e7271bf09f06c6fa40b4bdd553e836" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "9bb328b72755182c91bd45f33bc50130" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "d087092e4ef5eabe5147390a9b3b6462" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "02ff70964d13ead402fe6dbf9c7ebd3f" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 24772608, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 1179648 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 22413312 + } + ], + "md5sum": "e133970c70cbdb36df95c7f3d752006b" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "095ee05477e52cfbf34ee9b3f3bd7cfd" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "10a450a231df77480565a7242ffac39f" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "4f70f54a6f7baca4023ed3b02d5755bd" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "95014f5aab38e82e86ac48f9c82e7ad8" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "e9ed3806ae3cf22ae2d7ed5fe7a9e804" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "bf478e0d209c89c64e25be8ae4f80a04" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c472f4f970c7dd90f0248d6052126f67" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 31924224, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 21279744 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31896576 + }, + { + "name": "model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31905792 + }, + { + "name": "model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31915008 + } + ], + "md5sum": "99c920e2a1a302a04e254c87d0dfd1ae" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "d52cee8b45ac58c0aa8d99d84dad3333" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "5a7fcd8c22f9253be9ada2f47c9024c1" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "171d1100056925129cbaad1c52c52c21" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8748e1f671a913d8a01369463d638562" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "02b0429758f120e02f65c480cec4daed" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "901e9fb797c9f4372bed6c8cb00fbc22" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "808c3d2cba8849b9764fe200b1ba1fa2" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "73e1bb4dcc9782fb6dfd7227a7cc563d" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e4e0259ce7e9cfd993852b9d9628a818" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "9ae1eb95d80752c14921fcb65cfbd795" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "961e54055e38476f052b6fd954ff1d4e" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 33039360, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 1179648 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 20054016 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 22413312 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 31850496 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33030144 + } + ], + "md5sum": "238e7be9e6ac7379e5c653142724c2b4" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "d6ff763d16a58a54eee64194caee10d0" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "610dc5522438e5b0aa30b34f746a27e1" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "9e38c282bff631724eb92a9ef7f2c350" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0e15a90f7c12500c13355dbb644ac899" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 33094656, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10626048 + }, + { + "name": "model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10635264 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10644480 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10653696 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21279744 + }, + { + "name": "model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21288960 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21298176 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23657472 + } + ], + "md5sum": "23819cd608ee5129979413b51b005abd" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "0e89141a67103d9d5793af91c6837911" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "73503da1c0b4d8b2242eaf94ec4b3702" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "aa4b680996c670ec8e53cb7a514298af" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 1179648 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 1188864 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 11805696 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33039360 + }, + { + "name": "model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33048576 + }, + { + "name": "model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33057792 + } + ], + "md5sum": "ca55a32a973349ede1b3b1314d531d7e" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "cbe8685e158c8dab4f6249ffb00efa85" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "bc44ff7a2bfa1714dc74a196f7ebd639" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "777bfdefe85addf0c05a4c3759dd691d" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "109d6383bb338dad6e958a5ad8ee70b8" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "d3f28645a8f230dd4cae8a4620a8fa5c" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "d42c33d17cb01e71d4855ff810e65e0d" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "d58f8a23a69fc3bb34cf153b429ee090" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "ca21137bca9135f3ac5e7f7927b22d35" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "7de3122cd2f5fec29f1490e957295041" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "f6d2bb970c72374810d61f2da4430000" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "518f4abdf1758c603f2465ff7ec078be" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 33039360, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 1179648 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 20054016 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 22413312 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 31850496 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33030144 + } + ], + "md5sum": "cae7bb79a8b717a0920d19f294553df8" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "63cb1781b0fafaee32553dcc615b7ad9" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "1884fa696e8543badaff290871ddbe8e" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 31887360, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 10616832 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31850496 + }, + { + "name": "model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31868928 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31878144 + } + ], + "md5sum": "e5dd05da3c45754e665a0ec24a7fa3c8" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "b7fe7716d6a0578e8d6f9048b906adfe" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a9144483fefec9cf6ef724da3a1140ef" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 31878144, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 10616832 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31850496 + }, + { + "name": "model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31868928 + } + ], + "md5sum": "2f49c6b045ca4ed66f2e4201e749bac2" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "b845ea35c5c639536d57840fe3b7f1df" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "c4c4f0bfc7f67bfc4f442883f0ed15fa" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "c9d9e299c573de74e5108d2820adef04" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "01165a6c5042a86625961e0b92c0d30d" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "a8df28b1cd623302cefc50c203333247" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "4a62587afcabff67f436bed79ceb5e26" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "174fcba66796881cc6454cbe1313426d" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "8a9ce29c3fc016e4eaba8527ec399a0e" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "7c84ac0c41313658195f7ce7660e526b" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "bf0b2a5223ebc4dd31239dae554bfb2f" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "2c96d7c3244d1bf5431e62f5da92bc7c" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5e29d8c47fddfe64326a1d714f80dcfd" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 24772608, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 1179648 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 22413312 + } + ], + "md5sum": "4e80d7867681f840e562e6f9848974f5" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "c986ecb756c80d556e6e265e0f0386d2" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "f4f13231b6892d3226432baf55a22b9f" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "558e02ef3b7ffd894a310d9d0a208c70" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "7f3e75e6582ecca4e88d4ca4ad1a7f98" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c48a644d67086abad3fb7e08a52a5648" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 31924224, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 21279744 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31896576 + }, + { + "name": "model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31905792 + }, + { + "name": "model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31915008 + } + ], + "md5sum": "774fcc2846090fdb2c60e841eab32cb7" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "c86e9493fd420936083c92389c1ccadb" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "6a33b0f71dc4e28cf483c71c510b8253" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "b49548aeaee6482c737e32fa24f8e5f4" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "784e5276bfbc8678b08742c6c2b09555" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.34.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.34.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "956260c5a4a668eda36687c827d4751b" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "63e8db18c172b957558063d2d40cb8a6" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "8835673478ab5ef402526e3ca5e1ea80" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "7227f44954c82feb5f1c30eab3ef5988" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b6e20eeaafd47c5b19131181166b9797" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.35.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.35.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "52fe9e15bf3d2a3543c761ce0b7353a5" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "6d4b0552591774039788b74da87a0b46" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "7ab58ecce0c1c16ea1787a5fe90199ee" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 24772608, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 1179648 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 22413312 + } + ], + "md5sum": "87ee62065f4c18facdda8d72991ff222" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "602cfbeb7c28f25f7bf2de8e371820ff" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "88ecd578a7231aa44f612848eb179ec5" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "14921a6d973da8953908f184e72921f8" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "83e4278e69e2687bd8b28ba09189c3f7" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b8b7dab4db442af92171948037e396bb" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 31924224, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.36.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.36.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 21279744 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31896576 + }, + { + "name": "model.layers.37.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31905792 + }, + { + "name": "model.layers.37.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31915008 + } + ], + "md5sum": "95fd0e45248c425f59f68cc53167e2b0" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "bd224b6576e2ddb003387509a47dd784" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "a4a42ceaf2701638c47d7f1b777b04d9" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "b151ac545a8c891b6ae72678b5296882" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4b4ea490cb1d1fb31c8e32fbeb3b7814" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.38.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.38.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "9cec83703f8794626f0426c62ef0b8e9" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "82005d021d728d1bccbb1fda07b02c24" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "4a2b98a7cd6875d8342113e649122950" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "9c3157b109e1dcc63728cafded4b62ac" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e0c606a2d4758f66b0009d2d73b54fd7" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.39.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.39.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "58b3ae81d1f698b4d51c445dcce994d0" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "014c109e560922c179b7230a27a93982" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "f9194bc35b26f91066199bdbe7a3fdb9" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "82cd8a35c345c39a9230b31e9016ea17" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 1179648 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 1188864 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 11805696 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33039360 + }, + { + "name": "model.layers.40.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33048576 + }, + { + "name": "model.layers.40.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 33057792 + } + ], + "md5sum": "27e024fc3612bedc4bdc7a05f66e053b" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 31850496, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 12976128 + } + ], + "md5sum": "76d462e3e746504923bb925881fdd9ed" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "cc264a409a6a4f1d444f17c7826d0194" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "e0000bf3f54b0f2620e37f0fe409dd1d" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "643407faa98af54bdc676a25e08c24ab" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "c60b5fff12d9ac252317c0f10706ebbc" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 23639040, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.41.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.41.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23629824 + } + ], + "md5sum": "992a45bd7c3e0b2529b4a9a2f26a8eba" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "46cb8b08fbff7aadac1e87921b093cd9" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1044ee9c2271ff7e8121b548fd1b1e5a" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 31878144, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 10616832 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31850496 + }, + { + "name": "model.layers.42.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.42.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 31868928 + } + ], + "md5sum": "f0eeca89fc0fd136681d954ba6db8c67" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "04f087ee058228343099d5cf45a64685" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "63b3a8018f48fd3a7c9d26735abf4019" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "9a03582d04c436063327bd6b7bce31af" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "72b32e484f9828ead9f8d7d5e615a646" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.43.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.43.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "1290a794873a2717913de36d5d424977" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "a9e0b26c2a33481695ca153a6badf9a1" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "711e5cceadd6d83ef54a6f56bc4fb905" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "ea0cb8ae38045d4b24892cd0ae4bd0d4" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e1a9a2ae3c55ec14b06fa33994aec295" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.44.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.44.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "595aaf1de88435e412d90f66f60efce7" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "6a5dcdc29c3b23928d082f7e259b298d" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "bb8cd376a4366205bde2900caad9acee" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 24772608, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 21233664, + "byteOffset": 1179648 + }, + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2359296, + "byteOffset": 22413312 + } + ], + "md5sum": "780d6e42db0268e5fb5d050a36b3c7be" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "47e60cdc3ed8692f64dace821d032893" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 21279744, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.45.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.45.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.norm.weight", + "shape": [ + 4608 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 9216, + "byteOffset": 21270528 + } + ], + "md5sum": "7ff27e2810c0fc95f423b15dbec78bea" + } + ] +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..5464671092332a290c17782b5c5614da8c5c2697 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,7657 @@ +{ + "metadata": { + "ParamSize": 555, + "ParamBytes": 17019832320.0, + "BitsPerParam": 4.59034047349219 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 589824000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 589824000, + "byteOffset": 0 + } + ], + "md5sum": "33c0315e1fec4efbe2820fd1652b3907" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 73728000, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 73728000, + "byteOffset": 0 + } + ], + "md5sum": "63ec10e87949fa56acd5798401501776" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "0c1cd6bfe199f79866d2d724743715b2" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "dbc571257f7b5d76c501bbb64e80e44c" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a9244717eaecf8ca6572d9f064c14f30" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31887360, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 9216 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 10626048 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31868928 + }, + { + "name": "model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31878144 + } + ], + "md5sum": "b3620e1fe86056b27723263a9e1f1f4b" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "83a7a0e3cfc5882557c90a7fa75fde19" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "a1e94ebd955ef9311b1e55cb34ac7f8f" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "d28bb21824551bc8ce83b15deeea610f" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "fc1e10138f503e77e5994049a618a987" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "125d51bd2bea98fcba3662f08f2f27cf" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 31850496, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 10616832 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 29491200 + } + ], + "md5sum": "9e0cd8818ed2d1f9b234893e0dd4903a" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "be551132e6ce6659c7c4651e5a32456e" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "c752a99e269bdeafae717c84ac2916df" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "ea4cce00773b56c04d9d6547c4dfe64b" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "e66624ba48784d866d64c02b62ef1f8c" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "05ea1d140d8823a2cd282e166214b407" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 31924224, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 21279744 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31896576 + }, + { + "name": "model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31905792 + }, + { + "name": "model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31915008 + } + ], + "md5sum": "5b33441d7161a72475c6b86cb263a225" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "5af4aeb9c5c82fc9db406ca38737e4d0" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "f4bebd395193b56ae8113c0c8efcde4c" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "c5da438a9eb7ed33b6859e4a32121f34" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e440391d9c597de6ee9232d21faed545" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "308371e04ab12170dcaa59e97661ff76" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "770fbe7c4cc436fcac1d735099c8e394" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "974695a1d4ea81d548ac7547ea344f52" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "3dcf6811bbc321b97165a29fbd981981" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f0f49d462679eb0c55fc85dab9a6680c" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "3635e35c84bfa6b5f0c665546a9a76c4" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "1fede7938558fd9e2d5d063928f11509" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "b3215564782d6a49a6f8c0922e47df0a" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e9a0d20a0e90b5cccc20125107bc2579" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1179648 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 1188864 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11805696 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33039360 + }, + { + "name": "model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33048576 + }, + { + "name": "model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33057792 + } + ], + "md5sum": "c895528a5e7a44dc89901071be2a5353" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 31850496, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 12976128 + } + ], + "md5sum": "046ea54972b6ef10814ea6a2bf51f773" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "282f916d4271bc4849cf84c64124706e" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "17b7287870ed22be2de780739d055bed" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 31850496, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 12976128 + } + ], + "md5sum": "f88d1a5192037463d5aafdd161339cfb" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "4868c9f373189931a3df71588d2873c5" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "9c882e86fa67ced8c515fa4d3df4a74c" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "aa6e2d9bc22a4ddd1cb41ed87a7ebc6f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "dbb8db49f555a076087a294a5a56424a" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 23639040, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23629824 + } + ], + "md5sum": "a64ca3df72d305d246accbe8a50fd2c2" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "2672900bd06207d568034d3774ea270e" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "37501ff6c91f2d3297f605795fc552e8" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 31878144, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 10616832 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31850496 + }, + { + "name": "model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31868928 + } + ], + "md5sum": "34370fe6b9fa1d6a0bccd8a2350f6e13" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "cca45de3c8f5bc6084edbb3b2745c76f" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "240f55506b49f8f9225bc95c72d2f4e3" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "479861316a1012b680a3ce2ecaf39841" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "303bbea98b39981cb1b569f4a7bdbd22" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "055b841ef40ac2beaf03442d17f32ae9" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "259073fc5b03146a23a5645f3bf9b942" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "6b20344982979cc956e6e7e7d5625698" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "5ab2684bbd750443acddcb014018881b" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ef7fd69a8028e5100af953d630b2b24c" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "66758291c25300504c1464788389343c" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "5fd2531dccf85e7b8a195bee66b6c485" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "77303c8c73c293c2ddcf7e13d684b971" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "b6315c2670f65a7058fa5d90ee389ace" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 33076224, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1179648 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 1188864 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11805696 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33039360 + }, + { + "name": "model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33048576 + }, + { + "name": "model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33057792 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33067008 + } + ], + "md5sum": "f3aa08e0c1f1a912e80250fd97a931b0" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "f876c3da70d3298d36478575d1e2dec6" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "9ac71de09dac7be0a550577834e54979" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 31878144, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 10616832 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31850496 + }, + { + "name": "model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31868928 + } + ], + "md5sum": "74bbba64eea2d25225a8e141b6dc8605" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "4fcb68f0197865e0da3bd5a5054677e2" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "913523891a68e0822e4c520f3126f08c" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "406663acd3b5cc9f8bda8f05b16dcb16" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "177f5142c9fc77b4524afffb8a770b86" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "f7b5ace8da156e3d4c79c29f9ab479f4" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "d9d669d64e38939b1226ab425eab2715" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "249aaf1ba823c93629adc65b5195eb4c" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "fb42cdf53d472fa6870768a90b2e3347" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1b110b2736a0a13abe59f9be49b9c36e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "64fc3d5498dd0585e224e3f787362e71" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "e97fe2f975256473e06716abac854a8a" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "583b058fa6ea63e17d5fedd262a20b53" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 24772608, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 1179648 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22413312 + } + ], + "md5sum": "3e7fdcf31880becb794d83c5e45d0a9f" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "a9d432229c9f6fb00cb3b6e4825624fa" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "3ee71345500db399d249307246a0fa24" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "75e4bf1e0c6dd66625fea1fb8c953242" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "c951f58a71d96e62621b5c7ee515f5b8" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "fe96b2e16b5fc06d155fdf28d72654d8" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 31924224, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 21279744 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31896576 + }, + { + "name": "model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31905792 + }, + { + "name": "model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31915008 + } + ], + "md5sum": "87f4ae324cf337d27ea8d44a4e91c254" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "febf39d31ce44d169f259985d0ffb0f4" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "3e28792bfb792e8114de7f748bfb2c0e" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "506707b235a017d2a2a45ff5a8b5e39e" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5bb034ed2695face6bb364ffff10e5c0" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "afd27488025f214a9c15d1b091ca5c50" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "496288931ea30a9cf7ebd267d72b2dca" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "7b64b614e8a62f125e6732033148ea14" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "540ca6bec424b394e3d318cc9055826a" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "82e7271bf09f06c6fa40b4bdd553e836" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "9bb328b72755182c91bd45f33bc50130" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "d087092e4ef5eabe5147390a9b3b6462" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "02ff70964d13ead402fe6dbf9c7ebd3f" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 24772608, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 1179648 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22413312 + } + ], + "md5sum": "e133970c70cbdb36df95c7f3d752006b" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "095ee05477e52cfbf34ee9b3f3bd7cfd" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "10a450a231df77480565a7242ffac39f" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "4f70f54a6f7baca4023ed3b02d5755bd" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "95014f5aab38e82e86ac48f9c82e7ad8" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "e9ed3806ae3cf22ae2d7ed5fe7a9e804" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "bf478e0d209c89c64e25be8ae4f80a04" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c472f4f970c7dd90f0248d6052126f67" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 31924224, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 21279744 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31896576 + }, + { + "name": "model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31905792 + }, + { + "name": "model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31915008 + } + ], + "md5sum": "99c920e2a1a302a04e254c87d0dfd1ae" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "d52cee8b45ac58c0aa8d99d84dad3333" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "5a7fcd8c22f9253be9ada2f47c9024c1" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "171d1100056925129cbaad1c52c52c21" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8748e1f671a913d8a01369463d638562" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "02b0429758f120e02f65c480cec4daed" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "901e9fb797c9f4372bed6c8cb00fbc22" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "808c3d2cba8849b9764fe200b1ba1fa2" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "73e1bb4dcc9782fb6dfd7227a7cc563d" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e4e0259ce7e9cfd993852b9d9628a818" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "9ae1eb95d80752c14921fcb65cfbd795" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "961e54055e38476f052b6fd954ff1d4e" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 33039360, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 1179648 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 20054016 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 22413312 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 31850496 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33030144 + } + ], + "md5sum": "238e7be9e6ac7379e5c653142724c2b4" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "d6ff763d16a58a54eee64194caee10d0" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "610dc5522438e5b0aa30b34f746a27e1" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "9e38c282bff631724eb92a9ef7f2c350" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0e15a90f7c12500c13355dbb644ac899" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 33094656, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10626048 + }, + { + "name": "model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10635264 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10644480 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10653696 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21279744 + }, + { + "name": "model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21288960 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21298176 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23657472 + } + ], + "md5sum": "23819cd608ee5129979413b51b005abd" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "0e89141a67103d9d5793af91c6837911" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "73503da1c0b4d8b2242eaf94ec4b3702" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "aa4b680996c670ec8e53cb7a514298af" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1179648 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 1188864 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11805696 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33039360 + }, + { + "name": "model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33048576 + }, + { + "name": "model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33057792 + } + ], + "md5sum": "ca55a32a973349ede1b3b1314d531d7e" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "cbe8685e158c8dab4f6249ffb00efa85" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "bc44ff7a2bfa1714dc74a196f7ebd639" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "777bfdefe85addf0c05a4c3759dd691d" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "109d6383bb338dad6e958a5ad8ee70b8" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "d3f28645a8f230dd4cae8a4620a8fa5c" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "d42c33d17cb01e71d4855ff810e65e0d" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "d58f8a23a69fc3bb34cf153b429ee090" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "ca21137bca9135f3ac5e7f7927b22d35" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "7de3122cd2f5fec29f1490e957295041" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "f6d2bb970c72374810d61f2da4430000" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "518f4abdf1758c603f2465ff7ec078be" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 33039360, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 1179648 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 20054016 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 22413312 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 31850496 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33030144 + } + ], + "md5sum": "cae7bb79a8b717a0920d19f294553df8" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "63cb1781b0fafaee32553dcc615b7ad9" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "1884fa696e8543badaff290871ddbe8e" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 31887360, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 10616832 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31850496 + }, + { + "name": "model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31868928 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31878144 + } + ], + "md5sum": "e5dd05da3c45754e665a0ec24a7fa3c8" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "b7fe7716d6a0578e8d6f9048b906adfe" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a9144483fefec9cf6ef724da3a1140ef" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 31878144, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 10616832 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31850496 + }, + { + "name": "model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31868928 + } + ], + "md5sum": "2f49c6b045ca4ed66f2e4201e749bac2" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "b845ea35c5c639536d57840fe3b7f1df" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "c4c4f0bfc7f67bfc4f442883f0ed15fa" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "c9d9e299c573de74e5108d2820adef04" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "01165a6c5042a86625961e0b92c0d30d" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "a8df28b1cd623302cefc50c203333247" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "4a62587afcabff67f436bed79ceb5e26" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "174fcba66796881cc6454cbe1313426d" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "8a9ce29c3fc016e4eaba8527ec399a0e" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "7c84ac0c41313658195f7ce7660e526b" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "bf0b2a5223ebc4dd31239dae554bfb2f" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "2c96d7c3244d1bf5431e62f5da92bc7c" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5e29d8c47fddfe64326a1d714f80dcfd" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 24772608, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 1179648 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22413312 + } + ], + "md5sum": "4e80d7867681f840e562e6f9848974f5" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "c986ecb756c80d556e6e265e0f0386d2" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "f4f13231b6892d3226432baf55a22b9f" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "558e02ef3b7ffd894a310d9d0a208c70" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "7f3e75e6582ecca4e88d4ca4ad1a7f98" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c48a644d67086abad3fb7e08a52a5648" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 31924224, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 21279744 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31896576 + }, + { + "name": "model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31905792 + }, + { + "name": "model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31915008 + } + ], + "md5sum": "774fcc2846090fdb2c60e841eab32cb7" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "c86e9493fd420936083c92389c1ccadb" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "6a33b0f71dc4e28cf483c71c510b8253" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "b49548aeaee6482c737e32fa24f8e5f4" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "784e5276bfbc8678b08742c6c2b09555" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.34.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.34.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "956260c5a4a668eda36687c827d4751b" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "63e8db18c172b957558063d2d40cb8a6" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "8835673478ab5ef402526e3ca5e1ea80" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "7227f44954c82feb5f1c30eab3ef5988" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b6e20eeaafd47c5b19131181166b9797" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.35.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.35.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "52fe9e15bf3d2a3543c761ce0b7353a5" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "6d4b0552591774039788b74da87a0b46" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "7ab58ecce0c1c16ea1787a5fe90199ee" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 24772608, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 1179648 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22413312 + } + ], + "md5sum": "87ee62065f4c18facdda8d72991ff222" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "602cfbeb7c28f25f7bf2de8e371820ff" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "88ecd578a7231aa44f612848eb179ec5" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "14921a6d973da8953908f184e72921f8" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "83e4278e69e2687bd8b28ba09189c3f7" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b8b7dab4db442af92171948037e396bb" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 31924224, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.36.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.36.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21270528 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 21279744 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31896576 + }, + { + "name": "model.layers.37.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31905792 + }, + { + "name": "model.layers.37.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31915008 + } + ], + "md5sum": "95fd0e45248c425f59f68cc53167e2b0" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "bd224b6576e2ddb003387509a47dd784" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "a4a42ceaf2701638c47d7f1b777b04d9" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "b151ac545a8c891b6ae72678b5296882" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4b4ea490cb1d1fb31c8e32fbeb3b7814" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.38.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.38.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "9cec83703f8794626f0426c62ef0b8e9" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "82005d021d728d1bccbb1fda07b02c24" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "4a2b98a7cd6875d8342113e649122950" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "9c3157b109e1dcc63728cafded4b62ac" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e0c606a2d4758f66b0009d2d73b54fd7" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.39.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.39.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "58b3ae81d1f698b4d51c445dcce994d0" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "014c109e560922c179b7230a27a93982" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "f9194bc35b26f91066199bdbe7a3fdb9" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "82cd8a35c345c39a9230b31e9016ea17" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1179648 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 1188864 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 11805696 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33039360 + }, + { + "name": "model.layers.40.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33048576 + }, + { + "name": "model.layers.40.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 33057792 + } + ], + "md5sum": "27e024fc3612bedc4bdc7a05f66e053b" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 31850496, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 12976128 + } + ], + "md5sum": "76d462e3e746504923bb925881fdd9ed" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "cc264a409a6a4f1d444f17c7826d0194" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "e0000bf3f54b0f2620e37f0fe409dd1d" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "643407faa98af54bdc676a25e08c24ab" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "c60b5fff12d9ac252317c0f10706ebbc" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 23639040, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.41.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.41.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23629824 + } + ], + "md5sum": "992a45bd7c3e0b2529b4a9a2f26a8eba" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "46cb8b08fbff7aadac1e87921b093cd9" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1044ee9c2271ff7e8121b548fd1b1e5a" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 31878144, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 0 + }, + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 10616832 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31850496 + }, + { + "name": "model.layers.42.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31859712 + }, + { + "name": "model.layers.42.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 31868928 + } + ], + "md5sum": "f0eeca89fc0fd136681d954ba6db8c67" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "04f087ee058228343099d5cf45a64685" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "63b3a8018f48fd3a7c9d26735abf4019" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "9a03582d04c436063327bd6b7bce31af" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "72b32e484f9828ead9f8d7d5e615a646" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 25989120, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2359296 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 11796480 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 12976128 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 12985344 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23602176 + }, + { + "name": "model.layers.43.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23611392 + }, + { + "name": "model.layers.43.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 23620608 + }, + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23629824 + } + ], + "md5sum": "1290a794873a2717913de36d5d424977" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "a9e0b26c2a33481695ca153a6badf9a1" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "711e5cceadd6d83ef54a6f56bc4fb905" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 21233664, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 0 + } + ], + "md5sum": "ea0cb8ae38045d4b24892cd0ae4bd0d4" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e1a9a2ae3c55ec14b06fa33994aec295" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 33067008, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.44.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.44.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21270528 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23629824 + } + ], + "md5sum": "595aaf1de88435e412d90f66f60efce7" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 169869312, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 73728, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 169869312, + "byteOffset": 0 + } + ], + "md5sum": "6a5dcdc29c3b23928d082f7e259b298d" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 576 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "bb8cd376a4366205bde2900caad9acee" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 24772608, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 0 + }, + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 73728, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 21233664, + "byteOffset": 1179648 + }, + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22413312 + } + ], + "md5sum": "780d6e42db0268e5fb5d050a36b3c7be" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 84934656, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 4608, + 4608 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 84934656, + "byteOffset": 0 + } + ], + "md5sum": "47e60cdc3ed8692f64dace821d032893" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 21279744, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 4608, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 4608, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1179648, + "byteOffset": 9437184 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 10616832 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 4608, + 1152 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10616832, + "byteOffset": 10626048 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21242880 + }, + { + "name": "model.layers.45.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21252096 + }, + { + "name": "model.layers.45.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21261312 + }, + { + "name": "model.norm.weight", + "shape": [ + 4608 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 21270528 + } + ], + "md5sum": "7ff27e2810c0fc95f423b15dbec78bea" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b3b5c8e1c1e0158c136369ea7ed28b3766e3413 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ba5b3f5497ccfe0b22d6ad859d45a9dee692740cc60841039520cbae4861b0 +size 589824000 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..912d3d1c19d97b2dbb6d67dbea47b0722ad446ff --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a81dbd770fa4949994f9013bb9ffe49482a390ab1317cbd84129ce186693491a +size 73728000 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e19ee5eddffe7f2465a7df52a00ce24a28b6a2d --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8cb8fc89f471905d2c5f91f0b203ff12da01d5971618bf13b5815911d857f15 +size 25989120 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7e56f588803f81ae20a6da18a762fca0b6cab9f --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d98a54c97db67a88c5616af9f915512f2514bf1dd69f81ef3cf0d108496c27 +size 31924224 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..da31e90e8c9d0b11ad47cc3f2587e031a47376cc --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11ee833f6e5b11da2cb0090e824805186288e2e40d951a5e6e5382de50620015 +size 84934656 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..26ee962e339bb211d0fda3bbee590c8a8674ed48 --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7273c4bb91f92f260eb22b6351dbde7a92e76b662bc81addf0f2ce156daefee5 +size 169869312 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb5bba6b970c70b718b71c2d6993b597d0e4a1d2 --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beaa98086cdcb0dc66d40320c7d2882384a7d154f2c4dd4e024a020c2352fb51 +size 21233664 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad724f05e62dabdb5afbad6741c11477e3beaccb --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:927112c1a90b737a621a79931f85d4953cc3370ffd987c9fca0bffae66cd51a1 +size 18874368 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b4fc90aa167782923a8ea91e26a5ca94c8e764d --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e34f3d5b530927e8cd62bd3826499642f4c2d7e37145f6f0c3be3332fa3e82d +size 25989120 diff --git a/params_shard_106.bin b/params_shard_106.bin new file mode 100644 index 0000000000000000000000000000000000000000..52f3b34341ef51180624541bc84b2bfaf0fae3a2 --- /dev/null +++ b/params_shard_106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4d9faaf23689992f4404fa0348f8cffb5d62f7bfb99a0fe6669a40cb50af3e1 +size 84934656 diff --git a/params_shard_107.bin b/params_shard_107.bin new file mode 100644 index 0000000000000000000000000000000000000000..d76b5459c8714f28d79f7589ea12d900bb953b15 --- /dev/null +++ b/params_shard_107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb5f6ebb49b10f729b6d19318d54e00dbb3e98e8ce7e5791ede4b2d7ed2ba93 +size 169869312 diff --git a/params_shard_108.bin b/params_shard_108.bin new file mode 100644 index 0000000000000000000000000000000000000000..6529c6447672f527f6599509ed98684f0a56981a --- /dev/null +++ b/params_shard_108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d63587f07f527b3a7587d0d876e162ab0de72cc161f2449ff75013565b7ec9 +size 21233664 diff --git a/params_shard_109.bin b/params_shard_109.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce864df593feb695cd30b0de2eed69373102d29a --- /dev/null +++ b/params_shard_109.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ca5cbe8d9b534c994948bc70fdcd248935db5cf4028ba945820e8b97a49a97 +size 18874368 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..af0fa8f19f87011e4d143607d79fbe827786381e --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45cb46ad952a6b34f385b2935bf891460b730cb261b7015778c2a31335227dc +size 31850496 diff --git a/params_shard_110.bin b/params_shard_110.bin new file mode 100644 index 0000000000000000000000000000000000000000..5566bfbc75b0b42e49f9ecd1bfdb0aa1a04fefe1 --- /dev/null +++ b/params_shard_110.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482e48a3077de86e6e3ca511a1b7b7e22ebe66eb87a3d54f071e86b6a99d06ea +size 33067008 diff --git a/params_shard_111.bin b/params_shard_111.bin new file mode 100644 index 0000000000000000000000000000000000000000..853300e5a1b9a075eaaa3d282e2d7b4fd3bd2209 --- /dev/null +++ b/params_shard_111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:889258fa192789016dde1b320b1bc3f2954e91cd89a098336e75546837f927a8 +size 84934656 diff --git a/params_shard_112.bin b/params_shard_112.bin new file mode 100644 index 0000000000000000000000000000000000000000..47086e4add6d258c43fd7dd572f8634c97b51768 --- /dev/null +++ b/params_shard_112.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f00c9824d54a4a80b41d3f4f9a2a80e2801fabee1c4638d09af2c7951d42a5fa +size 33039360 diff --git a/params_shard_113.bin b/params_shard_113.bin new file mode 100644 index 0000000000000000000000000000000000000000..6dd00d31bf3778d0b3499617b5ebdb7dfbba97dd --- /dev/null +++ b/params_shard_113.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ccd2a002d50be13e5d03dad9997831d64fdc4bdb23c2f5afa70abb981c58963 +size 84934656 diff --git a/params_shard_114.bin b/params_shard_114.bin new file mode 100644 index 0000000000000000000000000000000000000000..b5d85ae125ebe5dc3667b99c42f7eba27437ee99 --- /dev/null +++ b/params_shard_114.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:609328b523a629ff353de7424ed1b5905210a6a7c73f18ae3faa3e57ba1ef8ca +size 169869312 diff --git a/params_shard_115.bin b/params_shard_115.bin new file mode 100644 index 0000000000000000000000000000000000000000..d2eacc48bacae8435d4e754c523a7fb0e038a70d --- /dev/null +++ b/params_shard_115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe8dd351613f592b4ef8522985bdbb14c22ac433930119ed356b1e8706c9d67c +size 21233664 diff --git a/params_shard_116.bin b/params_shard_116.bin new file mode 100644 index 0000000000000000000000000000000000000000..182abc0c4c15a3be0fbf0d19c3e5d819f450aee6 --- /dev/null +++ b/params_shard_116.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512e5f39b3e926d005bab637ab471bfa3e6f7eaa42aa6aac28bb285d5432ac58 +size 18874368 diff --git a/params_shard_117.bin b/params_shard_117.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff57f61f4485b9ad2236092296954ee785bf6e21 --- /dev/null +++ b/params_shard_117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce703cbe22eb917addd1b2c789c857237f56168edd7de1196f1189a3fe8bb51a +size 33094656 diff --git a/params_shard_118.bin b/params_shard_118.bin new file mode 100644 index 0000000000000000000000000000000000000000..964a7a8a2ba9d7cfd26d334061749103952fe522 --- /dev/null +++ b/params_shard_118.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b21f563bb7d7abe56ffaed3165a6359bcd8bc3ba705b8d0472e39b637930f51 +size 84934656 diff --git a/params_shard_119.bin b/params_shard_119.bin new file mode 100644 index 0000000000000000000000000000000000000000..0aeb65a7dc5ff1c2b1e27de3b3a05c41720c9684 --- /dev/null +++ b/params_shard_119.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56232e123eefbe5ca697a8e9dffc36549e21a2b923c4c5d27d33329b1695d629 +size 169869312 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cd5311277105ab1b9c9c67f9b2af18e07f0d693 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4515d65f7798113e5b13a846085a6a2f69b56ba1d8dba45c9b495091fd6867 +size 84934656 diff --git a/params_shard_120.bin b/params_shard_120.bin new file mode 100644 index 0000000000000000000000000000000000000000..2be5d9af92ff15a6f4e337c1e67fe68da9096316 --- /dev/null +++ b/params_shard_120.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7570d783519b583ab488d51556f004703c97e7c19c659a41f92ec243089e5462 +size 18874368 diff --git a/params_shard_121.bin b/params_shard_121.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3ad682b7aa1e526a7c3867fb0fb94855d482878 --- /dev/null +++ b/params_shard_121.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5340934dd3abf76e8a72397eedf80cf4b09b6d8e25f64fa952745ec2ed32853 +size 33067008 diff --git a/params_shard_122.bin b/params_shard_122.bin new file mode 100644 index 0000000000000000000000000000000000000000..e34ca0b5b0a36efba02ec3ded3bba1df3ff75b59 --- /dev/null +++ b/params_shard_122.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aa3e2358376700ac3d36aceb447b778bbed3459c664707b784ff1b8f396c2f5 +size 84934656 diff --git a/params_shard_123.bin b/params_shard_123.bin new file mode 100644 index 0000000000000000000000000000000000000000..6148becc71185d6992d18b5e38cc5c61498ab0fe --- /dev/null +++ b/params_shard_123.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d4a164a79f1bfbce13748270e8b1eee9689b52c39aeb261fb973cccb99fd9a +size 169869312 diff --git a/params_shard_124.bin b/params_shard_124.bin new file mode 100644 index 0000000000000000000000000000000000000000..d6d3490af897d66adb9308e32fa65df150e72de1 --- /dev/null +++ b/params_shard_124.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174e8cf2e9dd43ef1bf0ece984fb22bf259fa9d7705a80635412bea60d9fa528 +size 21233664 diff --git a/params_shard_125.bin b/params_shard_125.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f54ddbfd7a0202ccacb235147b9c2d773807c4c --- /dev/null +++ b/params_shard_125.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a704a630773a4cc5f74628818c91f3e729cc5eb81362b551c1c522236a300520 +size 18874368 diff --git a/params_shard_126.bin b/params_shard_126.bin new file mode 100644 index 0000000000000000000000000000000000000000..88892fbb82ecf6ecb5e0e7949ace1ad32205527a --- /dev/null +++ b/params_shard_126.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9f1e583a01aa3dbaa5a931bb4d561c7b44e2ced322230c9df2459da4a147701 +size 25989120 diff --git a/params_shard_127.bin b/params_shard_127.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3edfcd28ca6239d96ad6605ed65d816d04a1ea9 --- /dev/null +++ b/params_shard_127.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ddb82f5c951757cf66c5f4ee6b79396880f1c522fe4e03f08b6db44e017cb95 +size 84934656 diff --git a/params_shard_128.bin b/params_shard_128.bin new file mode 100644 index 0000000000000000000000000000000000000000..098f447fe367f7ead1c784342bf424cdbdb836e9 --- /dev/null +++ b/params_shard_128.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858505a52b9d6c21a57074ed42ed21a8630c6124e966c3c8260d60205fc4bf06 +size 169869312 diff --git a/params_shard_129.bin b/params_shard_129.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7690c5718b44436d3f7d243ab2dbdacb06167c3 --- /dev/null +++ b/params_shard_129.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ebb6d2ffa41c3293e2da47177a6c93aa9a97e20c24e490deb58791acec68219 +size 21233664 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..15a328608e9133d2fee41a689a8d56a6a6cd19cb --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5508e9eef5dc05d64b7c868d236fef4c1561d32f146eca06fe7e924715d7705b +size 84934656 diff --git a/params_shard_130.bin b/params_shard_130.bin new file mode 100644 index 0000000000000000000000000000000000000000..9f78ee1f479f2b6a15f44c6d631b4e184564c871 --- /dev/null +++ b/params_shard_130.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb52db2d624ad7f9445a0e90888a9117acf2573ab2ceae0438a3e5766beb7842 +size 18874368 diff --git a/params_shard_131.bin b/params_shard_131.bin new file mode 100644 index 0000000000000000000000000000000000000000..59020414487a1527221c62b41f1a69383e0ee0ac --- /dev/null +++ b/params_shard_131.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9379749efecbf402f8a731d8263f580a315d1bb88f36663a4b3d34d75de22a1 +size 33067008 diff --git a/params_shard_132.bin b/params_shard_132.bin new file mode 100644 index 0000000000000000000000000000000000000000..0956e570308ee07698a615659b669c459b6dae9a --- /dev/null +++ b/params_shard_132.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44a2ed1206be94bcdd2987349e70fd04aa2f39ee9b25a0cfc238d05624031ec +size 84934656 diff --git a/params_shard_133.bin b/params_shard_133.bin new file mode 100644 index 0000000000000000000000000000000000000000..c251bba21ac71b4b8914ce5d41b8a828ece08bc6 --- /dev/null +++ b/params_shard_133.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84dba5f6a01394207c377acc27403dc54a80df8960c8a9a19325d72bac499a1c +size 33039360 diff --git a/params_shard_134.bin b/params_shard_134.bin new file mode 100644 index 0000000000000000000000000000000000000000..afc79b5c026a32ba747741c5c0d5abefa20beabc --- /dev/null +++ b/params_shard_134.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b783fa34dbf52ad93b9caa9f73b9c6311f4a1f267da9ad88d376daf2cc1671 +size 169869312 diff --git a/params_shard_135.bin b/params_shard_135.bin new file mode 100644 index 0000000000000000000000000000000000000000..b4bad1d72174e0381cfb2a36ef5f6dae93fde4a4 --- /dev/null +++ b/params_shard_135.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:063da113c47dc030085633520110848045597d149047b08df5fa6443690e74ba +size 84934656 diff --git a/params_shard_136.bin b/params_shard_136.bin new file mode 100644 index 0000000000000000000000000000000000000000..29451f2934bc56ec04dd8e1929f47b7c630da199 --- /dev/null +++ b/params_shard_136.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f94df20dabf5113b10f7cd698ff8ee8c0b24d9bd8b019043c3e6b75e83b2e86 +size 31887360 diff --git a/params_shard_137.bin b/params_shard_137.bin new file mode 100644 index 0000000000000000000000000000000000000000..dd873389ffe276ff8fd6f8ad19b4fb38b231a5fb --- /dev/null +++ b/params_shard_137.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ed261c7009da1a24d405afc4e71cea20f1996eeeb9b0f7bb62bc6668f8cd4c +size 169869312 diff --git a/params_shard_138.bin b/params_shard_138.bin new file mode 100644 index 0000000000000000000000000000000000000000..48ddcc7423f206380e4056c014a148dfccc459b2 --- /dev/null +++ b/params_shard_138.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97d9bc2cf86945cf793e54f03a69e19ef6c49b127b8dec37fd976f45649d6013 +size 18874368 diff --git a/params_shard_139.bin b/params_shard_139.bin new file mode 100644 index 0000000000000000000000000000000000000000..496a00fff17b2e6e2250645508539e339fba55d3 --- /dev/null +++ b/params_shard_139.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0653523f9d72f1678fea7a8fd112a6e6303939c87fe04ee56c14c9ab727f8b7 +size 31878144 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..dfcf60a5bf3f8f27e3ae30ff8da7fecd20681280 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0dfd6330f3b81c579f0a2f585b797a88d244c18786844e871938ab7e9289aa0 +size 169869312 diff --git a/params_shard_140.bin b/params_shard_140.bin new file mode 100644 index 0000000000000000000000000000000000000000..4c0b43db9e284d09b3f2cbf8aa06f6e12ceaa29e --- /dev/null +++ b/params_shard_140.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39ff8dd3183a0b61dc221ad6a27be95a075b95df8965873bb8b9381857c8ce4a +size 84934656 diff --git a/params_shard_141.bin b/params_shard_141.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf3cb52ad52e790f6b9af83cdf380684c4227a32 --- /dev/null +++ b/params_shard_141.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e32b47a412756bc763ff46da2967cfa395e68e075a0af4ebfe2a84a2b05fda5c +size 169869312 diff --git a/params_shard_142.bin b/params_shard_142.bin new file mode 100644 index 0000000000000000000000000000000000000000..14f83e753e9e81527deb2d500741c2f4fdb3eaf0 --- /dev/null +++ b/params_shard_142.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb231dac3e98d5dd2cfaa79e3a5ad1cd7963094dfd2b95bcc64901513cecbb08 +size 21233664 diff --git a/params_shard_143.bin b/params_shard_143.bin new file mode 100644 index 0000000000000000000000000000000000000000..b45e5c28c6d8c780148917a4e1c0109ab62d959f --- /dev/null +++ b/params_shard_143.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384a06a72759b5fdec0687a45c42387a9025cfd7dacdf41f98af162f0b6bad0c +size 18874368 diff --git a/params_shard_144.bin b/params_shard_144.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a6facec2990a8d80dab12361f01af8cb2f74c7d --- /dev/null +++ b/params_shard_144.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8dbd7e9f99a2e1bb609ec264eef0abea186b22f733c4a63376823e4b37ca947 +size 25989120 diff --git a/params_shard_145.bin b/params_shard_145.bin new file mode 100644 index 0000000000000000000000000000000000000000..a35b72e7d8c709802ef8f9775359a41954ed54e4 --- /dev/null +++ b/params_shard_145.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bea9336cf6b5bb97529d46305ccfb6afee34b2a020b497b85ee429320d434ab +size 84934656 diff --git a/params_shard_146.bin b/params_shard_146.bin new file mode 100644 index 0000000000000000000000000000000000000000..00e081c0044afa7b0c61195c70b9a8f9b1cd7afb --- /dev/null +++ b/params_shard_146.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c205ade24778321b891e414bb7715007284364689c05163af8a8d52db3da7b5 +size 169869312 diff --git a/params_shard_147.bin b/params_shard_147.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb292cf13823da1b1a16629cf7b71baecb7ccc70 --- /dev/null +++ b/params_shard_147.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14cb41ad17e5c8222b1f77782e8f287328c2436290c735c0cd88bc11e64afbe1 +size 21233664 diff --git a/params_shard_148.bin b/params_shard_148.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e2504bd37acffecc639053362a31ab0bf87d38f --- /dev/null +++ b/params_shard_148.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc89feee772b999bcaa0adfe4d9edfe55fcec75b5e0e6e38cc5cc02042a2bbf6 +size 18874368 diff --git a/params_shard_149.bin b/params_shard_149.bin new file mode 100644 index 0000000000000000000000000000000000000000..1cddc92631d3492730d93cd1a7b2590d4abffe34 --- /dev/null +++ b/params_shard_149.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c979f07ba5d59fc96e93cb7efb6d96901354cab0b734ccb5b6290e26e5a78859 +size 33067008 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..f89d289a30deeb0e0f1844809f1191a4efba98c0 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc7d5d952cb4a22c9cbaea0c5754f73e6f54eb4a59bcd18acb39a57fa088d0a +size 21233664 diff --git a/params_shard_150.bin b/params_shard_150.bin new file mode 100644 index 0000000000000000000000000000000000000000..032fca24965c0ef1a5788ed96e0d12fa600dca96 --- /dev/null +++ b/params_shard_150.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed29d4e0dfd51d2ea145f52689d856a9bcb39fa9463840f886de276b4679a606 +size 169869312 diff --git a/params_shard_151.bin b/params_shard_151.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e6e19e0da359b78eb4b66b5e34dff24f91ba692 --- /dev/null +++ b/params_shard_151.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934d4c9b308081160cb95f6b9e0c61b5a39617a81b98efaa2d5ded011c4f670a +size 18874368 diff --git a/params_shard_152.bin b/params_shard_152.bin new file mode 100644 index 0000000000000000000000000000000000000000..151b606581f59b13a25484c3799c3200899fb07d --- /dev/null +++ b/params_shard_152.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15d00317c7a67840caea04f1bcf8c5edd1612949e3fc09cb21b35f8d98fa691f +size 24772608 diff --git a/params_shard_153.bin b/params_shard_153.bin new file mode 100644 index 0000000000000000000000000000000000000000..71d159c429842e0a28bd5148cfa993d370a44fb2 --- /dev/null +++ b/params_shard_153.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a278f3da5fc7d657e72fcf12f64fb4364aadc3a96e6dc3e32b8632ed8b01c15 +size 84934656 diff --git a/params_shard_154.bin b/params_shard_154.bin new file mode 100644 index 0000000000000000000000000000000000000000..12d473c7437d793eb0fc0a8ad65fcdb9a87140dc --- /dev/null +++ b/params_shard_154.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07f5b8fde25065bef54b519d4dc811cc60238fc0fbd000559a849f88c4a9e1d +size 84934656 diff --git a/params_shard_155.bin b/params_shard_155.bin new file mode 100644 index 0000000000000000000000000000000000000000..4573675acccab90590c16c58c25602f61190d6ab --- /dev/null +++ b/params_shard_155.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd0110f53dc01a1f94c5ab37ad7eda6becf3b39bb9fc6b7b1e24410389e86c55 +size 169869312 diff --git a/params_shard_156.bin b/params_shard_156.bin new file mode 100644 index 0000000000000000000000000000000000000000..007336bdf1f1ddfa443019a41b318dba3ea372d0 --- /dev/null +++ b/params_shard_156.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da2f5373b508382fbc6ed9e7856c31afb74369b1b0f0036997e283a56760d1f0 +size 21233664 diff --git a/params_shard_157.bin b/params_shard_157.bin new file mode 100644 index 0000000000000000000000000000000000000000..8463d15a076f060ffd669ac10b7d2c8516f3145d --- /dev/null +++ b/params_shard_157.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:328d34c782cb39381a7f67f68a7e3350b02df4c6ded1e5b9f1172a89cf3536e2 +size 18874368 diff --git a/params_shard_158.bin b/params_shard_158.bin new file mode 100644 index 0000000000000000000000000000000000000000..e92b199c12f3ff61e8fd8868759a0dd93909ffeb --- /dev/null +++ b/params_shard_158.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba8c55ba838f4371c0dc1af70952c3eea56a0d5bab4aa82e880c75acf41e822 +size 31924224 diff --git a/params_shard_159.bin b/params_shard_159.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e11192e60451c197c6dc75d34f2ca28d6321e42 --- /dev/null +++ b/params_shard_159.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f50beffcf2667c1ccd51370e6955ab56ff4f7400500bd218db3d50b4f235f96e +size 84934656 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..6da0770c956809aa5fc8ef967253dca37c8dff17 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a83ca6760c164ba3dcc7a64d6118b4abf19525509d00d7419a3b4936a2be54ff +size 18874368 diff --git a/params_shard_160.bin b/params_shard_160.bin new file mode 100644 index 0000000000000000000000000000000000000000..be557abf98bd7b5d44fa381d5dc7ef712f09ee67 --- /dev/null +++ b/params_shard_160.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f358923ddbe821bfe829d494e61f3a0cc3a3b4dd7a68622debd92b353f431d5e +size 169869312 diff --git a/params_shard_161.bin b/params_shard_161.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea81a39bc4a12376fdbf2f8c11417961ded21c5c --- /dev/null +++ b/params_shard_161.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2848c9478c6361983bc5d8180f27b9b4a125a18c249d736005fcf3d97d89ba13 +size 21233664 diff --git a/params_shard_162.bin b/params_shard_162.bin new file mode 100644 index 0000000000000000000000000000000000000000..13a498d1b423d64ce0f549b906270180b75306ba --- /dev/null +++ b/params_shard_162.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:211b72a9ef96b3f565bfb5290a7a4c34d52c7c5accdc155876efcb8ed661b219 +size 18874368 diff --git a/params_shard_163.bin b/params_shard_163.bin new file mode 100644 index 0000000000000000000000000000000000000000..57c7d76c5ed84800efe2dab56f8f0a8735d829d8 --- /dev/null +++ b/params_shard_163.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb435be9ecfc1a6882c0d864c156a2ff02d5719e6deb166bbceaa7b6b221cdae +size 25989120 diff --git a/params_shard_164.bin b/params_shard_164.bin new file mode 100644 index 0000000000000000000000000000000000000000..3909a2bce9372c8413a5f0cfac83de1cc6a7fe9a --- /dev/null +++ b/params_shard_164.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddae3063402d487fead07a320334ac838d70de753920aea099ac0ed3ae539c51 +size 84934656 diff --git a/params_shard_165.bin b/params_shard_165.bin new file mode 100644 index 0000000000000000000000000000000000000000..b8faa54b6c9bb5e8ceccf760a96f7d95d9b1ea24 --- /dev/null +++ b/params_shard_165.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f22b9270b394885068094a08a722629b88e2b8fa36887406c471209bb4a160f +size 169869312 diff --git a/params_shard_166.bin b/params_shard_166.bin new file mode 100644 index 0000000000000000000000000000000000000000..cd407fe50b9c24e679eb15e0a0fa4153598f303f --- /dev/null +++ b/params_shard_166.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57bd0627554db248a4c14c620d546209ff49c4ffdf7a97587ac90b9e325f8144 +size 21233664 diff --git a/params_shard_167.bin b/params_shard_167.bin new file mode 100644 index 0000000000000000000000000000000000000000..c62d24b01f38860b5336a0256994bd1a0ca59caa --- /dev/null +++ b/params_shard_167.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4bc6bef038f70bf03d22a6925f575ea670b5d835f772301cd90dce0ebd83e67 +size 18874368 diff --git a/params_shard_168.bin b/params_shard_168.bin new file mode 100644 index 0000000000000000000000000000000000000000..b77e59abec09d19a1f7831fcc3ed4593a5e97434 --- /dev/null +++ b/params_shard_168.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d68537fd178fd2075429a2865bca11e73796ec72d1dfcc006167e2d4a46c8170 +size 33067008 diff --git a/params_shard_169.bin b/params_shard_169.bin new file mode 100644 index 0000000000000000000000000000000000000000..ab9b8c4be02f561162a0c333d06970e8aae3b74b --- /dev/null +++ b/params_shard_169.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c8ff3c589056513e72ee841ad16a3f4e48d9cdc566446c5b36e36c8cb9ac2c3 +size 169869312 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..8f0dc982a961817015ae0e3084050cb4a6179a60 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c617a2e491fdb6b29fad31a7ef620a770fd4fab8c6d239eae1eda8daabc0347a +size 31924224 diff --git a/params_shard_170.bin b/params_shard_170.bin new file mode 100644 index 0000000000000000000000000000000000000000..33e6686f611e5395a859841e922235e2fc07a6a3 --- /dev/null +++ b/params_shard_170.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:634e389308595b63803d858916184ee4b9972ce3689d7157c3ba8be24844a2c1 +size 18874368 diff --git a/params_shard_171.bin b/params_shard_171.bin new file mode 100644 index 0000000000000000000000000000000000000000..29fc1772d323673150e484fb816b80894ac540d2 --- /dev/null +++ b/params_shard_171.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7e1e7b54574ed84201195a98668f8232dca77cef89ad74a315ee19460da7007 +size 24772608 diff --git a/params_shard_172.bin b/params_shard_172.bin new file mode 100644 index 0000000000000000000000000000000000000000..3c6ab5549436a287250463c40b2d3fb2ae73437a --- /dev/null +++ b/params_shard_172.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38fb9bead4d1d85a3f519ade708645d419c288e35a87303fb7742f7f92381ec3 +size 84934656 diff --git a/params_shard_173.bin b/params_shard_173.bin new file mode 100644 index 0000000000000000000000000000000000000000..2741bd14a760d37f4bbefeae30c4a736bcb65b45 --- /dev/null +++ b/params_shard_173.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c68f5725b2f9bdf2a4211349c6ee442deb1b02565618b287d73501981641f483 +size 84934656 diff --git a/params_shard_174.bin b/params_shard_174.bin new file mode 100644 index 0000000000000000000000000000000000000000..2f1aab21825d36b21b2ed1fd40840df5f5739da2 --- /dev/null +++ b/params_shard_174.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:376381d7faeee941dce4dd58af8b5b62bf08e7a8f953876a25d6dfb54da684a9 +size 169869312 diff --git a/params_shard_175.bin b/params_shard_175.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fc1489b36c299c1d9dcf4f511e0a0852c09ee48 --- /dev/null +++ b/params_shard_175.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567a9bc0c06fb69f798b604f5d556aeaf5013f58b0f708ac505874e015560f70 +size 21233664 diff --git a/params_shard_176.bin b/params_shard_176.bin new file mode 100644 index 0000000000000000000000000000000000000000..9178683d9ebefc84b52a112e10d6a9ef02f3d1cc --- /dev/null +++ b/params_shard_176.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7efea8454ccca1b1aa2cfce8affaad4fb4e3995ce93e18acf684ac33a291ab9d +size 18874368 diff --git a/params_shard_177.bin b/params_shard_177.bin new file mode 100644 index 0000000000000000000000000000000000000000..1acdafe1bd73425b08f49826e263c4ee2b7d0979 --- /dev/null +++ b/params_shard_177.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc7e1f154751febc3fe44c5d96b6b81e89b0590c4c54502f834ff3af63f040aa +size 31924224 diff --git a/params_shard_178.bin b/params_shard_178.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f5f2194d7f487ee1893c4effdd76aaccc8f00d0 --- /dev/null +++ b/params_shard_178.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6120784baa155732086e9980cb3d79bfceb06ed331dc1e7a9b87cf9803c4ccb7 +size 84934656 diff --git a/params_shard_179.bin b/params_shard_179.bin new file mode 100644 index 0000000000000000000000000000000000000000..23648517c5eb078345b303034d96a423fc83290d --- /dev/null +++ b/params_shard_179.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c096c8b33d781ef57c70735bfc73b1210493681bccf451126b128eb594bd4f4e +size 169869312 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fc46fef2e97a57b1e5946d90ff3bbc14bc35a3b --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43adcd903c75ae36cc741fccb29788cede6b98b49b678f0aa80e72ddb1cb6f77 +size 84934656 diff --git a/params_shard_180.bin b/params_shard_180.bin new file mode 100644 index 0000000000000000000000000000000000000000..9cee40c7807425683633e6692690a5ab5a424479 --- /dev/null +++ b/params_shard_180.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f594652b28c98f434a5663207c0dbb4f4a6c18311e05fda49f0c59ea64154ea +size 21233664 diff --git a/params_shard_181.bin b/params_shard_181.bin new file mode 100644 index 0000000000000000000000000000000000000000..832a057f394537b958d8db61fdca706d40c85b19 --- /dev/null +++ b/params_shard_181.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ce25a4979a7f967279245cc55f1d0bc77854a5ad92e521b95de9a79c1bcf0b +size 18874368 diff --git a/params_shard_182.bin b/params_shard_182.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f1838541fc57f3351eded68e8d45cbe1f0cb7ea --- /dev/null +++ b/params_shard_182.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5e232b225080e1240a83a24acfb48bcdcdc1b410d08bb812c7cd47c2e5f96e +size 25989120 diff --git a/params_shard_183.bin b/params_shard_183.bin new file mode 100644 index 0000000000000000000000000000000000000000..f046dcd8db8247d0c6e84e5e0c526ecacb065222 --- /dev/null +++ b/params_shard_183.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:549053806dccbe6af5b3326db337b1f01241deb56638f9647cee24b8f96e1d70 +size 84934656 diff --git a/params_shard_184.bin b/params_shard_184.bin new file mode 100644 index 0000000000000000000000000000000000000000..cf9f4be572c7223ea477b527ba171b3aa01f9eec --- /dev/null +++ b/params_shard_184.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91c2d006ba353ef4adc7a732c0bb82286826d9dddd9540d80a722c136346bf23 +size 169869312 diff --git a/params_shard_185.bin b/params_shard_185.bin new file mode 100644 index 0000000000000000000000000000000000000000..947ec072f4a3d0fbe1f62b62f3d8b1d5bdaaf7ee --- /dev/null +++ b/params_shard_185.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce7827f4a2c07de20f8a66ee969d7cc00defb8292fd549198e29ddf17391c4c +size 21233664 diff --git a/params_shard_186.bin b/params_shard_186.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c21994bc219cffcb848c8b874c2a3b246363107 --- /dev/null +++ b/params_shard_186.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:968f3d837c679aa3e1f452e016166306ce07a060cb335ef896f9ddd6a81402e6 +size 18874368 diff --git a/params_shard_187.bin b/params_shard_187.bin new file mode 100644 index 0000000000000000000000000000000000000000..257e6d60e8711f96491a0b34f22fa2c1a0f1d66a --- /dev/null +++ b/params_shard_187.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef49cf1943226d3a4f25608a0a8ac322400b586283963acbcb9284aef14e148 +size 33067008 diff --git a/params_shard_188.bin b/params_shard_188.bin new file mode 100644 index 0000000000000000000000000000000000000000..25609516e5df6eb3a5508eaa18ce6fcc041a0217 --- /dev/null +++ b/params_shard_188.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2df07accf56fa06d0bdd323e07b5a1b53e4489f23b582b2504666e2fb0b3f915 +size 84934656 diff --git a/params_shard_189.bin b/params_shard_189.bin new file mode 100644 index 0000000000000000000000000000000000000000..a93e0b4ca0fc65379943ce6cacb63a94097d5f11 --- /dev/null +++ b/params_shard_189.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b1e423511b71d1127a70dc3471b0e847f11902e80cf9365fec46c1bcdf05e4c +size 169869312 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..62ccf6abfe2cc2fcd6703df5f1a101641d3ade3d --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46305738a4ee6f57bdea62819421242a4721e61c363c6ce9b63723286550f7be +size 169869312 diff --git a/params_shard_190.bin b/params_shard_190.bin new file mode 100644 index 0000000000000000000000000000000000000000..d6f87ceef1dc5f4d2ffe2ac688f070ec42cf4454 --- /dev/null +++ b/params_shard_190.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e5441005b49ee60ec6a9759d310012002b9a5021ef1d443c842f0352d05de86 +size 18874368 diff --git a/params_shard_191.bin b/params_shard_191.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7843160da7d0176f524364c1fe607f1e8f12f27 --- /dev/null +++ b/params_shard_191.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6727431b11ff636df5c9abbf3c04eaf62236e9b0e8f141691256daa78e2223e +size 33067008 diff --git a/params_shard_192.bin b/params_shard_192.bin new file mode 100644 index 0000000000000000000000000000000000000000..6acecb193745ad11ceef3c82725ea3c2ac6fcfb9 --- /dev/null +++ b/params_shard_192.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc4ae384ff7438b98edde2662a0f139889676b131e84c7ae5b5aba58ac683874 +size 31850496 diff --git a/params_shard_193.bin b/params_shard_193.bin new file mode 100644 index 0000000000000000000000000000000000000000..00b15b11d70a9f1fca9a4429a9f933fbbd038c33 --- /dev/null +++ b/params_shard_193.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ed7867884d0585c8bdc372cfea66be562ff18ba6128c06769b2f143e6fa3a0 +size 84934656 diff --git a/params_shard_194.bin b/params_shard_194.bin new file mode 100644 index 0000000000000000000000000000000000000000..05bc29c8627f33a8c0640b7c96538c393e28d42a --- /dev/null +++ b/params_shard_194.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ecd7d4bbefbd6582085d33a607b21d81c34640e37af054ce62541225f65183d +size 169869312 diff --git a/params_shard_195.bin b/params_shard_195.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb226c89c855cc377be33c4929cbb7c5c8941750 --- /dev/null +++ b/params_shard_195.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cebafcb3fbc0cb6adc4d14828121c9aba0ad0660ff1b795d458bc10dbe7a35cb +size 21233664 diff --git a/params_shard_196.bin b/params_shard_196.bin new file mode 100644 index 0000000000000000000000000000000000000000..f17e60e0ce1db83cb552b8b43c7b91bcde26479c --- /dev/null +++ b/params_shard_196.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb514dd31f063816196e1b1ce5405963954b9f8d1aa818093d18c09e672f462b +size 84934656 diff --git a/params_shard_197.bin b/params_shard_197.bin new file mode 100644 index 0000000000000000000000000000000000000000..4724bd6661ab50ed45f2a5b47dcad9f7f7598ba6 --- /dev/null +++ b/params_shard_197.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a34c09de972ff671ea5a3206c829ea18a7d3834c25d1107bad9262c11ab30ae4 +size 23639040 diff --git a/params_shard_198.bin b/params_shard_198.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0369d55514665a1659bde6fc24632ae2cc8908e --- /dev/null +++ b/params_shard_198.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b17c171eb5d508f024f2838c46c6a79009d01467ea4e44a7c7fd66421a7bbf3 +size 169869312 diff --git a/params_shard_199.bin b/params_shard_199.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c579c6571e64441de2f6175cd4448b4949eda71 --- /dev/null +++ b/params_shard_199.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a443efa9b9a16c3ea0ac43b60a363a8a6dfdb232546d8c0d1e32e22d7d0028 +size 18874368 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..4422d914de8d71bdaab1d270a1b97116194a7ca0 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a166f8c473c6e776acd87783d543876ee4a551cfd5d2ea6e5f7dea7117374111 +size 84934656 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..8dfe11c388edadb78da681bca11be61e8f612d62 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b43fd987e1060eaf14075444c7dc49ae4109cfe89f2bd7e4eba1e3796b2171 +size 21233664 diff --git a/params_shard_200.bin b/params_shard_200.bin new file mode 100644 index 0000000000000000000000000000000000000000..d2686f8855a70f52d4618602f3993cee7f778278 --- /dev/null +++ b/params_shard_200.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a3a091c88002e411514c56abe4f5febbe278d551716247d321077f62bfe0f6 +size 31878144 diff --git a/params_shard_201.bin b/params_shard_201.bin new file mode 100644 index 0000000000000000000000000000000000000000..2725daadade352ead3163a3446933792e339a933 --- /dev/null +++ b/params_shard_201.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db36c1a9ba269bc66fc89f9720ec7a0fa7c02314d1762fb8f9ef25b29b021a3d +size 84934656 diff --git a/params_shard_202.bin b/params_shard_202.bin new file mode 100644 index 0000000000000000000000000000000000000000..452b8d5f5fa7d920c0eaf4d3ade640f1e91c3755 --- /dev/null +++ b/params_shard_202.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b224c9894718ce5dcc9164e6a0f16dfeb6d91701e4393005cc1111b4de0ace6 +size 169869312 diff --git a/params_shard_203.bin b/params_shard_203.bin new file mode 100644 index 0000000000000000000000000000000000000000..3cd695334e591895367f2f5ada5b82188c1cee59 --- /dev/null +++ b/params_shard_203.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e8b51d54da934534c5a4682f350623c388f0560a67979b68e70d4457176e08 +size 21233664 diff --git a/params_shard_204.bin b/params_shard_204.bin new file mode 100644 index 0000000000000000000000000000000000000000..132eb5a905fb1a88d6b125fb4a983a9d5c9ed213 --- /dev/null +++ b/params_shard_204.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e74d82d68de8e04c872f0b782cd215ec88daf805d4a10f159471cedc4f46cc59 +size 18874368 diff --git a/params_shard_205.bin b/params_shard_205.bin new file mode 100644 index 0000000000000000000000000000000000000000..1293bf117811bb9928c3489d18cde2261e19a22a --- /dev/null +++ b/params_shard_205.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7feaae25a161fbec1b2bfffde1a66bc13f2b61cccdbb4486e46cb7102b63f5cd +size 25989120 diff --git a/params_shard_206.bin b/params_shard_206.bin new file mode 100644 index 0000000000000000000000000000000000000000..cf7ca985cd85fdf66122ebec370441d97e45ddde --- /dev/null +++ b/params_shard_206.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b2eeaa0952542cdb7dad7e46eb356a45f7d82e92236def121f1f32690197fe +size 84934656 diff --git a/params_shard_207.bin b/params_shard_207.bin new file mode 100644 index 0000000000000000000000000000000000000000..f18c0aa009bfa6a620ad26f81b07cc684050610d --- /dev/null +++ b/params_shard_207.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d2bc8848a539cad7e0e3ce6b48d7ae7c9df10f85fe573a1add5a097ec67d10 +size 169869312 diff --git a/params_shard_208.bin b/params_shard_208.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8cd0f33f6988018b497883cabffd70293e7efff --- /dev/null +++ b/params_shard_208.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e07937286c9c4ebf3b943a3aa451eefd9c2c40e93d5b2525cd5b5a9bbfdf51 +size 21233664 diff --git a/params_shard_209.bin b/params_shard_209.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c81b5f866eaea2f6521ef7104d5f8af90771ecc --- /dev/null +++ b/params_shard_209.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1fd1af92a32cedf24b5094084b536f92c35db6eb3f18caa0b57dd64517cc8c2 +size 18874368 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea83a7de1215b5e88a5dbdebcf56317726216a4d --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d90e12de7ab67cc9e5b98b6fff17c3370736bc7cda98fd8480f77071860efe9 +size 18874368 diff --git a/params_shard_210.bin b/params_shard_210.bin new file mode 100644 index 0000000000000000000000000000000000000000..bccdfd87ca6af3bd6f8423bf45b26c00aa689f54 --- /dev/null +++ b/params_shard_210.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cca4ba3b1f6312724909a472a7e3f6a7f7345496d5fef2eabb04925538d93c5 +size 33067008 diff --git a/params_shard_211.bin b/params_shard_211.bin new file mode 100644 index 0000000000000000000000000000000000000000..1101c0bd7721fca8aa08572c52394ee4f175b2d3 --- /dev/null +++ b/params_shard_211.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29af4d6aaedb90cb72fc4a8a4afa360afde8f28581bfe4bc2b120e8fc0ae609a +size 169869312 diff --git a/params_shard_212.bin b/params_shard_212.bin new file mode 100644 index 0000000000000000000000000000000000000000..5c5577e7f151b11006c28fb0db72f0b2b888f76f --- /dev/null +++ b/params_shard_212.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa355f8cb2f8b0726692f0a4a7b03ed3c8886e52fa94a81892afbcff709a68dd +size 18874368 diff --git a/params_shard_213.bin b/params_shard_213.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc255a9d237a15c48c0dd5950e80f0c4955d8d88 --- /dev/null +++ b/params_shard_213.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4fd89c38024a8be3e95f04e5725a1dd3e632ce180aad9b84d960cf4d0bcfad8 +size 24772608 diff --git a/params_shard_214.bin b/params_shard_214.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7f7ba092d88c939569719e21a22ba7fda50f3cd --- /dev/null +++ b/params_shard_214.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa4b7ab119bdff052fccc5ae803d95674400c57137105cbfbdf2ede4e237f4b +size 84934656 diff --git a/params_shard_215.bin b/params_shard_215.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d6004d89a338336c9cf756fa604569a66eb6820 --- /dev/null +++ b/params_shard_215.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18b7ed496d07107781767cd12efde5be75ba45841d5eaec80eb68de1e776edb5 +size 21279744 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..9dcc72a64f3f240efc8c6c130d62a6e84eafcd47 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da23a10d55802b23fafd6bf06f4de3b1d0f9aac6ed828808e21f1ebfe38cd229 +size 25989120 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..7326c0cdb826342eec4ddf5c97eca089432654b8 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e47c1aaa8da163e5ae230d49ab9fd288099297b4800b6566c4db6091eeab0d +size 84934656 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e893ef38f2e518deb7d03f12c52f33f4de2325a --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5253b4a9e943963250fb9dee2418f8bd0d25f258a193faa4cd01de5c8e28b7c +size 169869312 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a6a1cc35262329823494c864a0472d87e4725ea --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2780d0c65d692a82e2da616666cb517840c910f100475c1b079dbb8b363e09 +size 21233664 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..4891faac7e21dc1f6e4a5dc6a36cba25e3043dc4 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c7288f6be6ff8eb29bd78a6416b12d7826642df1e903a582e2b8946a6e61cc +size 18874368 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbd0ab5842914807db23b00951c6cce8492843f4 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79b51c013e014fb569d16077666fe6f90b035b90b042dfb02ad14e7fa750c0c +size 33067008 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cea39cd52b64ed4297b9313c6ea4475508522a5 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c48fe613fd151d44f22dba146d3247e348011fa9531f702df219c838814a4a +size 84934656 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fff0b37d525a284c2613c863094cd5112988ba8 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5fc4c1c7ff3eb15b860b8da8a46c23e4d143898cb2e76f98dfefc8a42ae6747 +size 169869312 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..5c4f8ffda1c0400fb4529e0dd2d89fdc1eaeff8e --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:491119424bb048ce52c5ae89c4132ec77ca64c676bd8a87a7dbb6453a201e1c5 +size 169869312 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e0d0abfc3f4cd372ab9a351b336651427473969 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e39e967d7b473472ed8bc1d7b8925fb3792d7866e570f6e2dfde54d8fa77d71a +size 18874368 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..14c055fccfea76759b2212cf9cd99a2ed47621e1 --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:320f5b948f8cc312483ddc670a81c5f9fd60b274d976881d744aaf976328cf94 +size 33067008 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..0cf65e34f9658aa5231b9ac0aaffc8240471d4fd --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4fae57d327bce77127f715703de5f3c5034b5cca1c90511f6c56d3cfb25138e +size 31850496 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..6af36599361e2b690fec4448caee4845dad0cac1 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc821cb1cfdc3c4d0e0e07403e4d384b72465b4fc9350d19e6fc62e64a6ae9c +size 169869312 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a1194692147412149c9e939bfa226a7975d0245 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15463150713c173a063f5b3e4063099a9414cb3ae5da9a74523995f5ca2b901c +size 21233664 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..dabca57518e66bec26f11aa0c197845cbc1b22f5 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e56f1aa75be4a84d4053304d1afae18b718806e331cfdf820af945f6009a68 +size 31850496 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..6439ed8c3231dba75925ba900041576a5ad2f8ff --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ec7908fa0ae3d2d984d0495aaaf686250014f583e08f55f3fea644a874ba6a +size 84934656 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..27f055f54ccbaf5ba74feb2d3a94bef4392c198e --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e713f7d0556cd65cf1996f719526c3d85e78911d3510c340dae167834831fa7a +size 169869312 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..202a5b85a6098755ab7f895835419f35ccb74926 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82930faae50091d3321462c8bdd1a9a287715702bc897fa885d52d27bf8e6db +size 21233664 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..31e2d93a376b9da8c50295fc22f83fcf1022d6d8 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80bba46d40d70d3a9fa5df1ca7b28ed806c0a3f409a02ec917f7a0641e2ccb04 +size 84934656 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..ecba4d61cd5108888ea3a30e3f19628284ff034c --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f9c5fa2039fb37cc24909440897873ec096e8c800450473bcb8d0aa9e395d4 +size 18874368 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0bb8eb9038a8e08daf5d067a2cbdcfb085d24ed --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:037fbbe1c99d9a8b6c8e889cac9bee8a1475f1d9874579fd131d8a3f3f5333ef +size 23639040 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ee8c3d792e60a1b112884ca087e8b43da5cd3a7 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8cc8599e3a28b769dac21b8ce7071d9067742b44a58cd7b808be8b38a48f373 +size 169869312 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..ed50b1bb6f69976977277292eb5a357d7011c95a --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c6e87c683c441aa4fcc6750caa8b29e34a6150e688b8f54bd9d7cc4ad3a7403 +size 18874368 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a7716b66a45ce3c317f260ad12943c85e9c6593 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:654659a346400c87a5969599e736b4fc41a63c79f14c52925a361d849fa5de15 +size 31878144 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..f35ed27622218bb95adca367b176e312953bdd16 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a4ffdcf75805736db81e4ac8a0b73c349fdeed0875c1c031ca65bd6b3b7d305 +size 84934656 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..c6f77db7128ecd65c28036e66c4c9e6520e0c75c --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67a33620ccd4f249de8c992e08a6fbfdd0ad9e3c039eb987bf8ef314a49f5916 +size 169869312 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..3ea87f4d9b1e7f688962a7f0681fd2d6f09bd543 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70bb82e3ba6d16fabb7e2c1493d189a75b3f6aae4abde75aa13aaaf96857b59b +size 21233664 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..fbbd4af2e4d945427bb0ee52fe8d328893710576 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45960065a409586414ae75c97b563606c3f88e09468c0590cb417b3f1728cdb4 +size 18874368 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..96d141160232fc4fb62c5aa8ee4007afbf858796 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1359e8e1affe5dca9a2352c04a6e5822e364252c69e92c27e3ad57906bccad82 +size 25989120 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..4214923103f851643b4670237b1d4f5137dfe371 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cda6cffc9d9e5df65b265601e3dd60eeb31f2cef0d5d306ebc80728cc796147c +size 84934656 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..366706455e0901385060a86a11efca0a8666902b --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad50ec6a17b9bd1e29501398c9bf1082b4c40830dd55c8163e662aaf78ec7ee3 +size 31887360 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e32f7901d708c6ced6003da0d7ad8e2447b0e9d --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01443129c62b24f2a16fb8c136792bb83b975b198b175033749a2c4033871883 +size 169869312 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..faa93d6c32c9827bfef4864084ee7c3195b2d1c3 --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fd18acd7e29a0a52de1f01a857a38dd8e5b8441d6b2c176009562e8946a5d3e +size 21233664 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea3bc00081f0ff13a64c1ffd70b4a5807ef24913 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a714436d41279c92a2aa78e3ffd905d9bf51228cf08575f8d390a5ad1e43d9a +size 18874368 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..58b299baa194279392db80e99697b1a8a363636c --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9fee63dce8a87c4a6dea2acf3fa336726cc4c6995c054667a0afb069a7472be +size 33067008 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..7532e9ada745c514def6c6d4806c5f9a8fce2c45 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c355f0355d8427eb5f69c77147f83b5e724366a7ab7a98f14e5892fb1e38cf56 +size 84934656 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..3cf91f2be9b2047989c71f84f1542c54f29d049e --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:050464b67861b8abb842388ba6be6b6a38638ba8a717c9eaff5295ee00cda0b9 +size 169869312 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..90805fc2a18811cc6220f17b9a59b01870880094 --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53694b015f2afdd3dd3c79e36623c2e0cc3f3986c829651cc4616aa449327b0c +size 84934656 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..868527c7527eff44f179d9f9683245de907585fc --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:683bb1177311d6eb8d084347b98d8030e5a17d4b3f90da3eba0e65cfb7ee7da5 +size 33076224 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..32b6bf26176927c49a7864c5b3dd7db8c40457d7 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb8aacf8a00cb01b1c7e4c88ebedeef4090315818d87cde92c0bd3988272c7f3 +size 169869312 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..e138419193dcc415aea31f737bd8019a2824e28b --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b38b0ea8f886eaace9950665094ce590be9fb2dd1c2414f554be07a1ebb5658 +size 18874368 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..71874e5a9cbc0c420b55b6164f35fa11300f063e --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:304545eb88884645715079c0684f6d75cceff9d975ef79a4c70d4bd52589ee07 +size 84934656 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc34b32f09af67ca3a040188e210089a943513e1 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31588f1bf8782e3cc7b3fddf1e60aa6383e225d8bbd742f312642269e7cebd88 +size 31878144 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..0818d1d97e41ab947271fbbf8ee10dc22bc637ab --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12941d6032094cdffb05721a5bad77c2c50e5b63f594b682a59f7b0e6166d005 +size 84934656 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..7101430bc6d9c7b77962dec8bf02fe6a414dd402 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72dfb0a22bfe7c3aa519e9adc223ddcb1d6d34d15d5232661c4f04869b96f664 +size 169869312 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..4214d729d4936b1fcef23d874885fd78751e06ff --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d90a9a16706d6742371b7ae9fe14cf2e6eb1c6cdffb93c5e5af79875c27d875 +size 21233664 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ff2dfe0b0f4a54c2dde17fb3d00b27927399fd5 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa86f950ce0a0f28059593a2ef0ee3fb357fbce740496d65697b8204c5aa5a2 +size 18874368 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..b323183d1bdd06efc07bb6ba70727dea2559e9c1 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20cd4fe1c4daffd9ac27251c35fdb6775e55d428aef132ef5800cfb655f5a274 +size 25989120 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..4ac64e2946ad9dae300b0f43622afb1e1dc536eb --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8c37ff63157ebd2a163d3318f000f7b60bb94a2736dbb40914e01218124c5f3 +size 84934656 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..63d0f55815fbfa936552492d3608211ee70f5299 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31b3457ca25500db75d22d491489d0a982193d77a38c03d6e8379d9aef00613 +size 169869312 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..607ea1b25c58c49f7532f402deaea3ebf67c3a99 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c6e417d1002317793f772241b976018e47a1d2d51acf7c7241fe926dddf00b +size 21233664 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..cf2d9c2fb0bca6b6ad9eec080b831ff1eaaa2160 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78d3c947eb2c530026eff25dc01bbd504fdedb55d199445597acec338019b90f +size 18874368 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..0404806815a5767a7936c9e82ddd766edc52c280 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f001ecb5265ab56a3be3057f1e2590eae4c3826c5b529149fa468bffce2537 +size 169869312 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff27efe9ca19472d9c81ada615bf8418bdb41c28 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0b30520221c231d5d4d125eeee141457253a45a91373d5b7405140e2839fff4 +size 33067008 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e5bd24a7fc1a363db6fc86cc999d59e4caaf466 --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a22f86b007c3b5d71362ca2e85860750c45eae7211594ece65442cfd5bf950f +size 169869312 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..cfb5734b6818adde480aec55bad1693c68b17ab0 --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c70d04d30fb6722976f42f28ec7dfd0f5139f58cb960f873e1ca67d4310e33b +size 18874368 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..b96da91e602637ceddbad4e1ab3269ce837cd4b5 --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de6a582d2ea363ccb38cf4cba318983d8cf9ef2c3146799ea275780ffca1a8f +size 24772608 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..e67be8224df48926b8f7ee063d5b0e95e6b39b64 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65691608f6d9e7ba191d04405d59b57f773fa918c936bc40b01c79c40ac6ea53 +size 84934656 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..98f302035167efa08f373700b8648e8e2360d493 --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7007d8049df069445940cc793d1c0944999ba1aebc0f70fc9027c44f956416a7 +size 84934656 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..4ede9c8e7578dd3a1c61a3e8f9128daede635e2b --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be050b163e5eea92edeef3abc4a4591545e0e27eebb9507bb7d46c572436e85f +size 169869312 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9713d65fddb14c3ca48db3adf1023bff3c9924a --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e28f8d9738c319ef45bfda98200d50febfd1942b75c9e9180dda2dbb687e8630 +size 21233664 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..a04697d9f509692b6a667d60e3d3fc16a5ad0cf4 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350e7e8dbbe5f41385ac150205780576ce132d0945b2aa4a0b34c7dab46b6198 +size 18874368 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..9f6bf7afe41342aefa683739812e86c27fecd513 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c0636c58509ef4fc206eae44b03f3823b27173db716d68f6225736f983f89b +size 31924224 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..6beb67b112be3447cd0c5ec21b4b9b8533525f59 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150189d1446fd2484a58fdea8c0f8c5d6fd7c70e98f16c59c816ebe6a5a05ba5 +size 21233664 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..31718e30a21469241c9c048956a6998972a42016 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347e4525c315dbbccd1e401eb934ca086f215527640e7c635cacedaa49fff06e +size 84934656 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..e10df653d3699d6e5d2c9761d770d31ef57adf89 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:735645916e01a9be8bbae49ebbffd254b093fc78b13470b3b9af1bf1b6cbe987 +size 169869312 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..8ff07951ce04960617cafe4ea15cee7714033798 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8da770ca131b08e64b2f84538b0d2a34e9a461ee57cbdf283c36562aa2a515 +size 21233664 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..7411374447343b49c34a4f5d963c1d93504e156f --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dde27e27da9317c16c8b1635bf627e851d8603cd26259823e3a26ea93465827 +size 18874368 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..e0a17a1b3540d6f9ed26fbfae6f1aa395815fdaf --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2bae062df3d83746868bb4311eb541335a7121b2851ddafcb2e0dd8298927b +size 25989120 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e2ff0fbc554f3df6484c5e1677f57f0587d8ea8 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f082c3a24447d4ea43657a501b9e41b199eb25b69fd56349d2b6b7844dc19363 +size 84934656 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..3febd10178a73ae48b8ece1a4df8244206e67cf1 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8e5311b0cb8d78f599514c7c10892a5399129555fc9527f58500e12c96e94cc +size 169869312 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..7d1436f38e8c6087e27f673f217d3c8e689532b3 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9747c9408345da0edc433ea477ee3ac1c88d52096ada7aece4a9c1933f05b8e3 +size 21233664 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..440426517318f24119f13c529ea8f9484c45f573 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4264de4ee5ae184dff888d8a0ae2508f9882300ff52c5fd8f5c45fe1d80cad1a +size 18874368 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..204b51ad757e20c34b4e76d970c255d563ec3ed3 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56e4c41d92383cb4521b9e710e16cf03f1e2d6371af4bff8c139adffbafb8fcd +size 33067008 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b2d815a4fce535da4985ec5205d171e9c32e7ac --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97321e3ab038aaff72048a8bdbefebe134153c59c4bf75585d5b05cc0776df0a +size 18874368 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..52a9739723a1c7eec12d854240079e42de9bd630 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:403653eeade47db2003d78dbe8c471ddebd4bcf672dcabe4cdbe0e03bbd1dadb +size 169869312 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..4c04b6cee1c2ec41e4176b8118b8d54ae197cdb0 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb14906b73dd6cfb7c846b3c8a74659f60b85ca12907a596cdf80f200ba82fa +size 18874368 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..0815c71a2b3d338e5e29f3065c7fa6809d873bc8 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e501ad813d5c526adf07c426143848e628d8e17af805292f952f8cf796a9ba56 +size 24772608 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..de194ff170a615fd7039effc949d8c19a2311197 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021253f54474f2e9b96aa470ba1c76eb3c4af7b674a606dc488fc66fe9aadd9a +size 84934656 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..7a121e6d3c86f48d68401e198f56e3e824137d53 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b20d75d294ff3273f838c88085d18981aabf7035942eb2988285693904765b96 +size 169869312 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..1249c1e28a8e2d3975457aa5440e2716db37424c --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22267b0042b51f9f13f736cfcdb75fda4329ba5c8932ebd2fb8d005ae08e05e0 +size 21233664 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..94fa2044c7ee0d08b6353f41a2e54b056a679b50 --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038190081cfc7cbd3aee82675124ae9d08d34be8d924813bdfc27352d1fde3c4 +size 84934656 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..87124eeaabc120cfd0bda3c84d8a4dbc68eaef14 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79cfae45c6ccd104845d1c70abb91ff0876cef97bab79e92cacf891bfefddca0 +size 169869312 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..e004ca85826afabdcac1853d98915b40f69afbba --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188627fc778d4ee609f34c4f88ebf45800033f4d39f319c9cee0dac41c6c134e +size 21233664 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..5084b4c3feea6f008512d2ec1c5b8fb737142116 --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79c6e328cc0415eb68969c748ec0cf31acbc2154fafbf7cf90776fc195dc033 +size 18874368 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..5771f48b1e9b53a3865929ed27275c483186c9d7 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da53ca29fb16f6b2489482fc0bc6a394162cdab14d12764a1755ebc583fea79 +size 17518525 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0e9edce2d1a5e6a5b002865aeade7f475ecf087d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}