Felladrin commited on
Commit
eaf1f51
1 Parent(s): f5c6da8

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: h2oai/h2o-danube3-500m-chat
3
+ ---
4
+
5
+ [MLC](https://llm.mlc.ai/) version of [h2oai/h2o-danube3-500m-chat](https://huggingface.co/h2oai/h2o-danube3-500m-chat), using `q4f16_1` quantization.
6
+
mlc-chat-config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q4f16_1",
5
+ "model_config": {
6
+ "hidden_size": 1536,
7
+ "intermediate_size": 4096,
8
+ "num_attention_heads": 16,
9
+ "num_hidden_layers": 16,
10
+ "rms_norm_eps": 1e-05,
11
+ "vocab_size": 32000,
12
+ "tie_word_embeddings": false,
13
+ "position_embedding_base": 100000,
14
+ "rope_scaling": null,
15
+ "context_window_size": 8192,
16
+ "prefill_chunk_size": 2048,
17
+ "num_key_value_heads": 8,
18
+ "head_dim": 96,
19
+ "tensor_parallel_shards": 1,
20
+ "pipeline_parallel_stages": 1,
21
+ "max_batch_size": 80
22
+ },
23
+ "vocab_size": 32000,
24
+ "context_window_size": 8192,
25
+ "sliding_window_size": -1,
26
+ "prefill_chunk_size": 2048,
27
+ "attention_sink_size": -1,
28
+ "tensor_parallel_shards": 1,
29
+ "pipeline_parallel_stages": 1,
30
+ "temperature": 1.0,
31
+ "presence_penalty": 0.0,
32
+ "frequency_penalty": 0.0,
33
+ "repetition_penalty": 1.0,
34
+ "top_p": 1.0,
35
+ "tokenizer_files": [
36
+ "tokenizer.model",
37
+ "tokenizer.json",
38
+ "tokenizer_config.json"
39
+ ],
40
+ "tokenizer_info": {
41
+ "token_postproc_method": "byte_fallback",
42
+ "prepend_space_in_encode": false,
43
+ "strip_space_in_decode": false
44
+ },
45
+ "conv_template": {
46
+ "name": "chatml",
47
+ "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
48
+ "system_message": "A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.",
49
+ "system_prefix_token_ids": null,
50
+ "add_role_after_system_message": true,
51
+ "roles": {
52
+ "user": "<|im_start|>user",
53
+ "assistant": "<|im_start|>assistant"
54
+ },
55
+ "role_templates": {
56
+ "user": "{user_message}",
57
+ "assistant": "{assistant_message}",
58
+ "tool": "{tool_message}"
59
+ },
60
+ "messages": [],
61
+ "seps": [
62
+ "<|im_end|>\n"
63
+ ],
64
+ "role_content_sep": "\n",
65
+ "role_empty_sep": "\n",
66
+ "stop_str": [
67
+ "<|im_end|>"
68
+ ],
69
+ "stop_token_ids": [
70
+ 2
71
+ ],
72
+ "function_string": "",
73
+ "use_function_calling": false
74
+ },
75
+ "pad_token_id": 0,
76
+ "bos_token_id": 1,
77
+ "eos_token_id": 2
78
+ }
model.wasm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94ad803872cdda3dc2db3f57c4c3d9e4928618af182db2131bfa0ec002121dee
3
+ size 4362680
ndarray-cache.json ADDED
@@ -0,0 +1,1871 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 165,
4
+ "ParamBytes": 288967680.0,
5
+ "BitsPerParam": 4.50113497363691
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 24576000,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 32000,
17
+ 192
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 24576000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "633e77cd70c09a1157f8c90051b657a9"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 30723072,
31
+ "records": [
32
+ {
33
+ "name": "lm_head.q_weight",
34
+ "shape": [
35
+ 32000,
36
+ 192
37
+ ],
38
+ "dtype": "uint32",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 24576000,
41
+ "byteOffset": 0
42
+ },
43
+ {
44
+ "name": "lm_head.q_scale",
45
+ "shape": [
46
+ 32000,
47
+ 48
48
+ ],
49
+ "dtype": "float16",
50
+ "format": "f32-to-bf16",
51
+ "nbytes": 3072000,
52
+ "byteOffset": 24576000
53
+ },
54
+ {
55
+ "name": "model.embed_tokens.q_scale",
56
+ "shape": [
57
+ 32000,
58
+ 48
59
+ ],
60
+ "dtype": "float16",
61
+ "format": "f32-to-bf16",
62
+ "nbytes": 3072000,
63
+ "byteOffset": 27648000
64
+ },
65
+ {
66
+ "name": "model.layers.0.input_layernorm.weight",
67
+ "shape": [
68
+ 1536
69
+ ],
70
+ "dtype": "float16",
71
+ "format": "f32-to-bf16",
72
+ "nbytes": 3072,
73
+ "byteOffset": 30720000
74
+ }
75
+ ],
76
+ "md5sum": "dca9dd04b8518413856ec25481de4e12"
77
+ },
78
+ {
79
+ "dataPath": "params_shard_2.bin",
80
+ "format": "raw-shard",
81
+ "nbytes": 32747520,
82
+ "records": [
83
+ {
84
+ "name": "model.layers.0.mlp.down_proj.q_weight",
85
+ "shape": [
86
+ 1536,
87
+ 512
88
+ ],
89
+ "dtype": "uint32",
90
+ "format": "f32-to-bf16",
91
+ "nbytes": 3145728,
92
+ "byteOffset": 0
93
+ },
94
+ {
95
+ "name": "model.layers.0.mlp.down_proj.q_scale",
96
+ "shape": [
97
+ 1536,
98
+ 128
99
+ ],
100
+ "dtype": "float16",
101
+ "format": "f32-to-bf16",
102
+ "nbytes": 393216,
103
+ "byteOffset": 3145728
104
+ },
105
+ {
106
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
107
+ "shape": [
108
+ 8192,
109
+ 192
110
+ ],
111
+ "dtype": "uint32",
112
+ "format": "f32-to-bf16",
113
+ "nbytes": 6291456,
114
+ "byteOffset": 3538944
115
+ },
116
+ {
117
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
118
+ "shape": [
119
+ 8192,
120
+ 48
121
+ ],
122
+ "dtype": "float16",
123
+ "format": "f32-to-bf16",
124
+ "nbytes": 786432,
125
+ "byteOffset": 9830400
126
+ },
127
+ {
128
+ "name": "model.layers.0.post_attention_layernorm.weight",
129
+ "shape": [
130
+ 1536
131
+ ],
132
+ "dtype": "float16",
133
+ "format": "f32-to-bf16",
134
+ "nbytes": 3072,
135
+ "byteOffset": 10616832
136
+ },
137
+ {
138
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
139
+ "shape": [
140
+ 3072,
141
+ 192
142
+ ],
143
+ "dtype": "uint32",
144
+ "format": "f32-to-bf16",
145
+ "nbytes": 2359296,
146
+ "byteOffset": 10619904
147
+ },
148
+ {
149
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
150
+ "shape": [
151
+ 3072,
152
+ 48
153
+ ],
154
+ "dtype": "float16",
155
+ "format": "f32-to-bf16",
156
+ "nbytes": 294912,
157
+ "byteOffset": 12979200
158
+ },
159
+ {
160
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
161
+ "shape": [
162
+ 1536,
163
+ 192
164
+ ],
165
+ "dtype": "uint32",
166
+ "format": "f32-to-bf16",
167
+ "nbytes": 1179648,
168
+ "byteOffset": 13274112
169
+ },
170
+ {
171
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
172
+ "shape": [
173
+ 1536,
174
+ 48
175
+ ],
176
+ "dtype": "float16",
177
+ "format": "f32-to-bf16",
178
+ "nbytes": 147456,
179
+ "byteOffset": 14453760
180
+ },
181
+ {
182
+ "name": "model.layers.1.input_layernorm.weight",
183
+ "shape": [
184
+ 1536
185
+ ],
186
+ "dtype": "float16",
187
+ "format": "f32-to-bf16",
188
+ "nbytes": 3072,
189
+ "byteOffset": 14601216
190
+ },
191
+ {
192
+ "name": "model.layers.1.mlp.down_proj.q_weight",
193
+ "shape": [
194
+ 1536,
195
+ 512
196
+ ],
197
+ "dtype": "uint32",
198
+ "format": "f32-to-bf16",
199
+ "nbytes": 3145728,
200
+ "byteOffset": 14604288
201
+ },
202
+ {
203
+ "name": "model.layers.1.mlp.down_proj.q_scale",
204
+ "shape": [
205
+ 1536,
206
+ 128
207
+ ],
208
+ "dtype": "float16",
209
+ "format": "f32-to-bf16",
210
+ "nbytes": 393216,
211
+ "byteOffset": 17750016
212
+ },
213
+ {
214
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
215
+ "shape": [
216
+ 8192,
217
+ 192
218
+ ],
219
+ "dtype": "uint32",
220
+ "format": "f32-to-bf16",
221
+ "nbytes": 6291456,
222
+ "byteOffset": 18143232
223
+ },
224
+ {
225
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
226
+ "shape": [
227
+ 8192,
228
+ 48
229
+ ],
230
+ "dtype": "float16",
231
+ "format": "f32-to-bf16",
232
+ "nbytes": 786432,
233
+ "byteOffset": 24434688
234
+ },
235
+ {
236
+ "name": "model.layers.1.post_attention_layernorm.weight",
237
+ "shape": [
238
+ 1536
239
+ ],
240
+ "dtype": "float16",
241
+ "format": "f32-to-bf16",
242
+ "nbytes": 3072,
243
+ "byteOffset": 25221120
244
+ },
245
+ {
246
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
247
+ "shape": [
248
+ 3072,
249
+ 192
250
+ ],
251
+ "dtype": "uint32",
252
+ "format": "f32-to-bf16",
253
+ "nbytes": 2359296,
254
+ "byteOffset": 25224192
255
+ },
256
+ {
257
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
258
+ "shape": [
259
+ 3072,
260
+ 48
261
+ ],
262
+ "dtype": "float16",
263
+ "format": "f32-to-bf16",
264
+ "nbytes": 294912,
265
+ "byteOffset": 27583488
266
+ },
267
+ {
268
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
269
+ "shape": [
270
+ 1536,
271
+ 192
272
+ ],
273
+ "dtype": "uint32",
274
+ "format": "f32-to-bf16",
275
+ "nbytes": 1179648,
276
+ "byteOffset": 27878400
277
+ },
278
+ {
279
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
280
+ "shape": [
281
+ 1536,
282
+ 48
283
+ ],
284
+ "dtype": "float16",
285
+ "format": "f32-to-bf16",
286
+ "nbytes": 147456,
287
+ "byteOffset": 29058048
288
+ },
289
+ {
290
+ "name": "model.layers.10.input_layernorm.weight",
291
+ "shape": [
292
+ 1536
293
+ ],
294
+ "dtype": "float16",
295
+ "format": "f32-to-bf16",
296
+ "nbytes": 3072,
297
+ "byteOffset": 29205504
298
+ },
299
+ {
300
+ "name": "model.layers.10.mlp.down_proj.q_weight",
301
+ "shape": [
302
+ 1536,
303
+ 512
304
+ ],
305
+ "dtype": "uint32",
306
+ "format": "f32-to-bf16",
307
+ "nbytes": 3145728,
308
+ "byteOffset": 29208576
309
+ },
310
+ {
311
+ "name": "model.layers.10.mlp.down_proj.q_scale",
312
+ "shape": [
313
+ 1536,
314
+ 128
315
+ ],
316
+ "dtype": "float16",
317
+ "format": "f32-to-bf16",
318
+ "nbytes": 393216,
319
+ "byteOffset": 32354304
320
+ }
321
+ ],
322
+ "md5sum": "acde503c622856d6448b2219453a9b11"
323
+ },
324
+ {
325
+ "dataPath": "params_shard_3.bin",
326
+ "format": "raw-shard",
327
+ "nbytes": 29208576,
328
+ "records": [
329
+ {
330
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
331
+ "shape": [
332
+ 8192,
333
+ 192
334
+ ],
335
+ "dtype": "uint32",
336
+ "format": "f32-to-bf16",
337
+ "nbytes": 6291456,
338
+ "byteOffset": 0
339
+ },
340
+ {
341
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
342
+ "shape": [
343
+ 8192,
344
+ 48
345
+ ],
346
+ "dtype": "float16",
347
+ "format": "f32-to-bf16",
348
+ "nbytes": 786432,
349
+ "byteOffset": 6291456
350
+ },
351
+ {
352
+ "name": "model.layers.10.post_attention_layernorm.weight",
353
+ "shape": [
354
+ 1536
355
+ ],
356
+ "dtype": "float16",
357
+ "format": "f32-to-bf16",
358
+ "nbytes": 3072,
359
+ "byteOffset": 7077888
360
+ },
361
+ {
362
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
363
+ "shape": [
364
+ 3072,
365
+ 192
366
+ ],
367
+ "dtype": "uint32",
368
+ "format": "f32-to-bf16",
369
+ "nbytes": 2359296,
370
+ "byteOffset": 7080960
371
+ },
372
+ {
373
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
374
+ "shape": [
375
+ 3072,
376
+ 48
377
+ ],
378
+ "dtype": "float16",
379
+ "format": "f32-to-bf16",
380
+ "nbytes": 294912,
381
+ "byteOffset": 9440256
382
+ },
383
+ {
384
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
385
+ "shape": [
386
+ 1536,
387
+ 192
388
+ ],
389
+ "dtype": "uint32",
390
+ "format": "f32-to-bf16",
391
+ "nbytes": 1179648,
392
+ "byteOffset": 9735168
393
+ },
394
+ {
395
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
396
+ "shape": [
397
+ 1536,
398
+ 48
399
+ ],
400
+ "dtype": "float16",
401
+ "format": "f32-to-bf16",
402
+ "nbytes": 147456,
403
+ "byteOffset": 10914816
404
+ },
405
+ {
406
+ "name": "model.layers.11.input_layernorm.weight",
407
+ "shape": [
408
+ 1536
409
+ ],
410
+ "dtype": "float16",
411
+ "format": "f32-to-bf16",
412
+ "nbytes": 3072,
413
+ "byteOffset": 11062272
414
+ },
415
+ {
416
+ "name": "model.layers.11.mlp.down_proj.q_weight",
417
+ "shape": [
418
+ 1536,
419
+ 512
420
+ ],
421
+ "dtype": "uint32",
422
+ "format": "f32-to-bf16",
423
+ "nbytes": 3145728,
424
+ "byteOffset": 11065344
425
+ },
426
+ {
427
+ "name": "model.layers.11.mlp.down_proj.q_scale",
428
+ "shape": [
429
+ 1536,
430
+ 128
431
+ ],
432
+ "dtype": "float16",
433
+ "format": "f32-to-bf16",
434
+ "nbytes": 393216,
435
+ "byteOffset": 14211072
436
+ },
437
+ {
438
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
439
+ "shape": [
440
+ 8192,
441
+ 192
442
+ ],
443
+ "dtype": "uint32",
444
+ "format": "f32-to-bf16",
445
+ "nbytes": 6291456,
446
+ "byteOffset": 14604288
447
+ },
448
+ {
449
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
450
+ "shape": [
451
+ 8192,
452
+ 48
453
+ ],
454
+ "dtype": "float16",
455
+ "format": "f32-to-bf16",
456
+ "nbytes": 786432,
457
+ "byteOffset": 20895744
458
+ },
459
+ {
460
+ "name": "model.layers.11.post_attention_layernorm.weight",
461
+ "shape": [
462
+ 1536
463
+ ],
464
+ "dtype": "float16",
465
+ "format": "f32-to-bf16",
466
+ "nbytes": 3072,
467
+ "byteOffset": 21682176
468
+ },
469
+ {
470
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
471
+ "shape": [
472
+ 3072,
473
+ 192
474
+ ],
475
+ "dtype": "uint32",
476
+ "format": "f32-to-bf16",
477
+ "nbytes": 2359296,
478
+ "byteOffset": 21685248
479
+ },
480
+ {
481
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
482
+ "shape": [
483
+ 3072,
484
+ 48
485
+ ],
486
+ "dtype": "float16",
487
+ "format": "f32-to-bf16",
488
+ "nbytes": 294912,
489
+ "byteOffset": 24044544
490
+ },
491
+ {
492
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
493
+ "shape": [
494
+ 1536,
495
+ 192
496
+ ],
497
+ "dtype": "uint32",
498
+ "format": "f32-to-bf16",
499
+ "nbytes": 1179648,
500
+ "byteOffset": 24339456
501
+ },
502
+ {
503
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
504
+ "shape": [
505
+ 1536,
506
+ 48
507
+ ],
508
+ "dtype": "float16",
509
+ "format": "f32-to-bf16",
510
+ "nbytes": 147456,
511
+ "byteOffset": 25519104
512
+ },
513
+ {
514
+ "name": "model.layers.12.input_layernorm.weight",
515
+ "shape": [
516
+ 1536
517
+ ],
518
+ "dtype": "float16",
519
+ "format": "f32-to-bf16",
520
+ "nbytes": 3072,
521
+ "byteOffset": 25666560
522
+ },
523
+ {
524
+ "name": "model.layers.12.mlp.down_proj.q_weight",
525
+ "shape": [
526
+ 1536,
527
+ 512
528
+ ],
529
+ "dtype": "uint32",
530
+ "format": "f32-to-bf16",
531
+ "nbytes": 3145728,
532
+ "byteOffset": 25669632
533
+ },
534
+ {
535
+ "name": "model.layers.12.mlp.down_proj.q_scale",
536
+ "shape": [
537
+ 1536,
538
+ 128
539
+ ],
540
+ "dtype": "float16",
541
+ "format": "f32-to-bf16",
542
+ "nbytes": 393216,
543
+ "byteOffset": 28815360
544
+ }
545
+ ],
546
+ "md5sum": "a9b819d6d1241df4d18fc34f3cd297ce"
547
+ },
548
+ {
549
+ "dataPath": "params_shard_4.bin",
550
+ "format": "raw-shard",
551
+ "nbytes": 29208576,
552
+ "records": [
553
+ {
554
+ "name": "model.layers.12.mlp.gate_up_proj.q_weight",
555
+ "shape": [
556
+ 8192,
557
+ 192
558
+ ],
559
+ "dtype": "uint32",
560
+ "format": "f32-to-bf16",
561
+ "nbytes": 6291456,
562
+ "byteOffset": 0
563
+ },
564
+ {
565
+ "name": "model.layers.12.mlp.gate_up_proj.q_scale",
566
+ "shape": [
567
+ 8192,
568
+ 48
569
+ ],
570
+ "dtype": "float16",
571
+ "format": "f32-to-bf16",
572
+ "nbytes": 786432,
573
+ "byteOffset": 6291456
574
+ },
575
+ {
576
+ "name": "model.layers.12.post_attention_layernorm.weight",
577
+ "shape": [
578
+ 1536
579
+ ],
580
+ "dtype": "float16",
581
+ "format": "f32-to-bf16",
582
+ "nbytes": 3072,
583
+ "byteOffset": 7077888
584
+ },
585
+ {
586
+ "name": "model.layers.12.self_attn.qkv_proj.q_weight",
587
+ "shape": [
588
+ 3072,
589
+ 192
590
+ ],
591
+ "dtype": "uint32",
592
+ "format": "f32-to-bf16",
593
+ "nbytes": 2359296,
594
+ "byteOffset": 7080960
595
+ },
596
+ {
597
+ "name": "model.layers.12.self_attn.qkv_proj.q_scale",
598
+ "shape": [
599
+ 3072,
600
+ 48
601
+ ],
602
+ "dtype": "float16",
603
+ "format": "f32-to-bf16",
604
+ "nbytes": 294912,
605
+ "byteOffset": 9440256
606
+ },
607
+ {
608
+ "name": "model.layers.12.self_attn.o_proj.q_weight",
609
+ "shape": [
610
+ 1536,
611
+ 192
612
+ ],
613
+ "dtype": "uint32",
614
+ "format": "f32-to-bf16",
615
+ "nbytes": 1179648,
616
+ "byteOffset": 9735168
617
+ },
618
+ {
619
+ "name": "model.layers.12.self_attn.o_proj.q_scale",
620
+ "shape": [
621
+ 1536,
622
+ 48
623
+ ],
624
+ "dtype": "float16",
625
+ "format": "f32-to-bf16",
626
+ "nbytes": 147456,
627
+ "byteOffset": 10914816
628
+ },
629
+ {
630
+ "name": "model.layers.13.input_layernorm.weight",
631
+ "shape": [
632
+ 1536
633
+ ],
634
+ "dtype": "float16",
635
+ "format": "f32-to-bf16",
636
+ "nbytes": 3072,
637
+ "byteOffset": 11062272
638
+ },
639
+ {
640
+ "name": "model.layers.13.mlp.down_proj.q_weight",
641
+ "shape": [
642
+ 1536,
643
+ 512
644
+ ],
645
+ "dtype": "uint32",
646
+ "format": "f32-to-bf16",
647
+ "nbytes": 3145728,
648
+ "byteOffset": 11065344
649
+ },
650
+ {
651
+ "name": "model.layers.13.mlp.down_proj.q_scale",
652
+ "shape": [
653
+ 1536,
654
+ 128
655
+ ],
656
+ "dtype": "float16",
657
+ "format": "f32-to-bf16",
658
+ "nbytes": 393216,
659
+ "byteOffset": 14211072
660
+ },
661
+ {
662
+ "name": "model.layers.13.mlp.gate_up_proj.q_weight",
663
+ "shape": [
664
+ 8192,
665
+ 192
666
+ ],
667
+ "dtype": "uint32",
668
+ "format": "f32-to-bf16",
669
+ "nbytes": 6291456,
670
+ "byteOffset": 14604288
671
+ },
672
+ {
673
+ "name": "model.layers.13.mlp.gate_up_proj.q_scale",
674
+ "shape": [
675
+ 8192,
676
+ 48
677
+ ],
678
+ "dtype": "float16",
679
+ "format": "f32-to-bf16",
680
+ "nbytes": 786432,
681
+ "byteOffset": 20895744
682
+ },
683
+ {
684
+ "name": "model.layers.13.post_attention_layernorm.weight",
685
+ "shape": [
686
+ 1536
687
+ ],
688
+ "dtype": "float16",
689
+ "format": "f32-to-bf16",
690
+ "nbytes": 3072,
691
+ "byteOffset": 21682176
692
+ },
693
+ {
694
+ "name": "model.layers.13.self_attn.qkv_proj.q_weight",
695
+ "shape": [
696
+ 3072,
697
+ 192
698
+ ],
699
+ "dtype": "uint32",
700
+ "format": "f32-to-bf16",
701
+ "nbytes": 2359296,
702
+ "byteOffset": 21685248
703
+ },
704
+ {
705
+ "name": "model.layers.13.self_attn.qkv_proj.q_scale",
706
+ "shape": [
707
+ 3072,
708
+ 48
709
+ ],
710
+ "dtype": "float16",
711
+ "format": "f32-to-bf16",
712
+ "nbytes": 294912,
713
+ "byteOffset": 24044544
714
+ },
715
+ {
716
+ "name": "model.layers.13.self_attn.o_proj.q_weight",
717
+ "shape": [
718
+ 1536,
719
+ 192
720
+ ],
721
+ "dtype": "uint32",
722
+ "format": "f32-to-bf16",
723
+ "nbytes": 1179648,
724
+ "byteOffset": 24339456
725
+ },
726
+ {
727
+ "name": "model.layers.13.self_attn.o_proj.q_scale",
728
+ "shape": [
729
+ 1536,
730
+ 48
731
+ ],
732
+ "dtype": "float16",
733
+ "format": "f32-to-bf16",
734
+ "nbytes": 147456,
735
+ "byteOffset": 25519104
736
+ },
737
+ {
738
+ "name": "model.layers.14.input_layernorm.weight",
739
+ "shape": [
740
+ 1536
741
+ ],
742
+ "dtype": "float16",
743
+ "format": "f32-to-bf16",
744
+ "nbytes": 3072,
745
+ "byteOffset": 25666560
746
+ },
747
+ {
748
+ "name": "model.layers.14.mlp.down_proj.q_weight",
749
+ "shape": [
750
+ 1536,
751
+ 512
752
+ ],
753
+ "dtype": "uint32",
754
+ "format": "f32-to-bf16",
755
+ "nbytes": 3145728,
756
+ "byteOffset": 25669632
757
+ },
758
+ {
759
+ "name": "model.layers.14.mlp.down_proj.q_scale",
760
+ "shape": [
761
+ 1536,
762
+ 128
763
+ ],
764
+ "dtype": "float16",
765
+ "format": "f32-to-bf16",
766
+ "nbytes": 393216,
767
+ "byteOffset": 28815360
768
+ }
769
+ ],
770
+ "md5sum": "8f1c9f6da63d1f79b9405093c969ee85"
771
+ },
772
+ {
773
+ "dataPath": "params_shard_5.bin",
774
+ "format": "raw-shard",
775
+ "nbytes": 29208576,
776
+ "records": [
777
+ {
778
+ "name": "model.layers.14.mlp.gate_up_proj.q_weight",
779
+ "shape": [
780
+ 8192,
781
+ 192
782
+ ],
783
+ "dtype": "uint32",
784
+ "format": "f32-to-bf16",
785
+ "nbytes": 6291456,
786
+ "byteOffset": 0
787
+ },
788
+ {
789
+ "name": "model.layers.14.mlp.gate_up_proj.q_scale",
790
+ "shape": [
791
+ 8192,
792
+ 48
793
+ ],
794
+ "dtype": "float16",
795
+ "format": "f32-to-bf16",
796
+ "nbytes": 786432,
797
+ "byteOffset": 6291456
798
+ },
799
+ {
800
+ "name": "model.layers.14.post_attention_layernorm.weight",
801
+ "shape": [
802
+ 1536
803
+ ],
804
+ "dtype": "float16",
805
+ "format": "f32-to-bf16",
806
+ "nbytes": 3072,
807
+ "byteOffset": 7077888
808
+ },
809
+ {
810
+ "name": "model.layers.14.self_attn.qkv_proj.q_weight",
811
+ "shape": [
812
+ 3072,
813
+ 192
814
+ ],
815
+ "dtype": "uint32",
816
+ "format": "f32-to-bf16",
817
+ "nbytes": 2359296,
818
+ "byteOffset": 7080960
819
+ },
820
+ {
821
+ "name": "model.layers.14.self_attn.qkv_proj.q_scale",
822
+ "shape": [
823
+ 3072,
824
+ 48
825
+ ],
826
+ "dtype": "float16",
827
+ "format": "f32-to-bf16",
828
+ "nbytes": 294912,
829
+ "byteOffset": 9440256
830
+ },
831
+ {
832
+ "name": "model.layers.14.self_attn.o_proj.q_weight",
833
+ "shape": [
834
+ 1536,
835
+ 192
836
+ ],
837
+ "dtype": "uint32",
838
+ "format": "f32-to-bf16",
839
+ "nbytes": 1179648,
840
+ "byteOffset": 9735168
841
+ },
842
+ {
843
+ "name": "model.layers.14.self_attn.o_proj.q_scale",
844
+ "shape": [
845
+ 1536,
846
+ 48
847
+ ],
848
+ "dtype": "float16",
849
+ "format": "f32-to-bf16",
850
+ "nbytes": 147456,
851
+ "byteOffset": 10914816
852
+ },
853
+ {
854
+ "name": "model.layers.15.input_layernorm.weight",
855
+ "shape": [
856
+ 1536
857
+ ],
858
+ "dtype": "float16",
859
+ "format": "f32-to-bf16",
860
+ "nbytes": 3072,
861
+ "byteOffset": 11062272
862
+ },
863
+ {
864
+ "name": "model.layers.15.mlp.down_proj.q_weight",
865
+ "shape": [
866
+ 1536,
867
+ 512
868
+ ],
869
+ "dtype": "uint32",
870
+ "format": "f32-to-bf16",
871
+ "nbytes": 3145728,
872
+ "byteOffset": 11065344
873
+ },
874
+ {
875
+ "name": "model.layers.15.mlp.down_proj.q_scale",
876
+ "shape": [
877
+ 1536,
878
+ 128
879
+ ],
880
+ "dtype": "float16",
881
+ "format": "f32-to-bf16",
882
+ "nbytes": 393216,
883
+ "byteOffset": 14211072
884
+ },
885
+ {
886
+ "name": "model.layers.15.mlp.gate_up_proj.q_weight",
887
+ "shape": [
888
+ 8192,
889
+ 192
890
+ ],
891
+ "dtype": "uint32",
892
+ "format": "f32-to-bf16",
893
+ "nbytes": 6291456,
894
+ "byteOffset": 14604288
895
+ },
896
+ {
897
+ "name": "model.layers.15.mlp.gate_up_proj.q_scale",
898
+ "shape": [
899
+ 8192,
900
+ 48
901
+ ],
902
+ "dtype": "float16",
903
+ "format": "f32-to-bf16",
904
+ "nbytes": 786432,
905
+ "byteOffset": 20895744
906
+ },
907
+ {
908
+ "name": "model.layers.15.post_attention_layernorm.weight",
909
+ "shape": [
910
+ 1536
911
+ ],
912
+ "dtype": "float16",
913
+ "format": "f32-to-bf16",
914
+ "nbytes": 3072,
915
+ "byteOffset": 21682176
916
+ },
917
+ {
918
+ "name": "model.layers.15.self_attn.qkv_proj.q_weight",
919
+ "shape": [
920
+ 3072,
921
+ 192
922
+ ],
923
+ "dtype": "uint32",
924
+ "format": "f32-to-bf16",
925
+ "nbytes": 2359296,
926
+ "byteOffset": 21685248
927
+ },
928
+ {
929
+ "name": "model.layers.15.self_attn.qkv_proj.q_scale",
930
+ "shape": [
931
+ 3072,
932
+ 48
933
+ ],
934
+ "dtype": "float16",
935
+ "format": "f32-to-bf16",
936
+ "nbytes": 294912,
937
+ "byteOffset": 24044544
938
+ },
939
+ {
940
+ "name": "model.layers.15.self_attn.o_proj.q_weight",
941
+ "shape": [
942
+ 1536,
943
+ 192
944
+ ],
945
+ "dtype": "uint32",
946
+ "format": "f32-to-bf16",
947
+ "nbytes": 1179648,
948
+ "byteOffset": 24339456
949
+ },
950
+ {
951
+ "name": "model.layers.15.self_attn.o_proj.q_scale",
952
+ "shape": [
953
+ 1536,
954
+ 48
955
+ ],
956
+ "dtype": "float16",
957
+ "format": "f32-to-bf16",
958
+ "nbytes": 147456,
959
+ "byteOffset": 25519104
960
+ },
961
+ {
962
+ "name": "model.layers.2.input_layernorm.weight",
963
+ "shape": [
964
+ 1536
965
+ ],
966
+ "dtype": "float16",
967
+ "format": "f32-to-bf16",
968
+ "nbytes": 3072,
969
+ "byteOffset": 25666560
970
+ },
971
+ {
972
+ "name": "model.layers.2.mlp.down_proj.q_weight",
973
+ "shape": [
974
+ 1536,
975
+ 512
976
+ ],
977
+ "dtype": "uint32",
978
+ "format": "f32-to-bf16",
979
+ "nbytes": 3145728,
980
+ "byteOffset": 25669632
981
+ },
982
+ {
983
+ "name": "model.layers.2.mlp.down_proj.q_scale",
984
+ "shape": [
985
+ 1536,
986
+ 128
987
+ ],
988
+ "dtype": "float16",
989
+ "format": "f32-to-bf16",
990
+ "nbytes": 393216,
991
+ "byteOffset": 28815360
992
+ }
993
+ ],
994
+ "md5sum": "ac5aea0e00011b11cc74943af756baa2"
995
+ },
996
+ {
997
+ "dataPath": "params_shard_6.bin",
998
+ "format": "raw-shard",
999
+ "nbytes": 29208576,
1000
+ "records": [
1001
+ {
1002
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
1003
+ "shape": [
1004
+ 8192,
1005
+ 192
1006
+ ],
1007
+ "dtype": "uint32",
1008
+ "format": "f32-to-bf16",
1009
+ "nbytes": 6291456,
1010
+ "byteOffset": 0
1011
+ },
1012
+ {
1013
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
1014
+ "shape": [
1015
+ 8192,
1016
+ 48
1017
+ ],
1018
+ "dtype": "float16",
1019
+ "format": "f32-to-bf16",
1020
+ "nbytes": 786432,
1021
+ "byteOffset": 6291456
1022
+ },
1023
+ {
1024
+ "name": "model.layers.2.post_attention_layernorm.weight",
1025
+ "shape": [
1026
+ 1536
1027
+ ],
1028
+ "dtype": "float16",
1029
+ "format": "f32-to-bf16",
1030
+ "nbytes": 3072,
1031
+ "byteOffset": 7077888
1032
+ },
1033
+ {
1034
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
1035
+ "shape": [
1036
+ 3072,
1037
+ 192
1038
+ ],
1039
+ "dtype": "uint32",
1040
+ "format": "f32-to-bf16",
1041
+ "nbytes": 2359296,
1042
+ "byteOffset": 7080960
1043
+ },
1044
+ {
1045
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
1046
+ "shape": [
1047
+ 3072,
1048
+ 48
1049
+ ],
1050
+ "dtype": "float16",
1051
+ "format": "f32-to-bf16",
1052
+ "nbytes": 294912,
1053
+ "byteOffset": 9440256
1054
+ },
1055
+ {
1056
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
1057
+ "shape": [
1058
+ 1536,
1059
+ 192
1060
+ ],
1061
+ "dtype": "uint32",
1062
+ "format": "f32-to-bf16",
1063
+ "nbytes": 1179648,
1064
+ "byteOffset": 9735168
1065
+ },
1066
+ {
1067
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
1068
+ "shape": [
1069
+ 1536,
1070
+ 48
1071
+ ],
1072
+ "dtype": "float16",
1073
+ "format": "f32-to-bf16",
1074
+ "nbytes": 147456,
1075
+ "byteOffset": 10914816
1076
+ },
1077
+ {
1078
+ "name": "model.layers.3.input_layernorm.weight",
1079
+ "shape": [
1080
+ 1536
1081
+ ],
1082
+ "dtype": "float16",
1083
+ "format": "f32-to-bf16",
1084
+ "nbytes": 3072,
1085
+ "byteOffset": 11062272
1086
+ },
1087
+ {
1088
+ "name": "model.layers.3.mlp.down_proj.q_weight",
1089
+ "shape": [
1090
+ 1536,
1091
+ 512
1092
+ ],
1093
+ "dtype": "uint32",
1094
+ "format": "f32-to-bf16",
1095
+ "nbytes": 3145728,
1096
+ "byteOffset": 11065344
1097
+ },
1098
+ {
1099
+ "name": "model.layers.3.mlp.down_proj.q_scale",
1100
+ "shape": [
1101
+ 1536,
1102
+ 128
1103
+ ],
1104
+ "dtype": "float16",
1105
+ "format": "f32-to-bf16",
1106
+ "nbytes": 393216,
1107
+ "byteOffset": 14211072
1108
+ },
1109
+ {
1110
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
1111
+ "shape": [
1112
+ 8192,
1113
+ 192
1114
+ ],
1115
+ "dtype": "uint32",
1116
+ "format": "f32-to-bf16",
1117
+ "nbytes": 6291456,
1118
+ "byteOffset": 14604288
1119
+ },
1120
+ {
1121
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
1122
+ "shape": [
1123
+ 8192,
1124
+ 48
1125
+ ],
1126
+ "dtype": "float16",
1127
+ "format": "f32-to-bf16",
1128
+ "nbytes": 786432,
1129
+ "byteOffset": 20895744
1130
+ },
1131
+ {
1132
+ "name": "model.layers.3.post_attention_layernorm.weight",
1133
+ "shape": [
1134
+ 1536
1135
+ ],
1136
+ "dtype": "float16",
1137
+ "format": "f32-to-bf16",
1138
+ "nbytes": 3072,
1139
+ "byteOffset": 21682176
1140
+ },
1141
+ {
1142
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
1143
+ "shape": [
1144
+ 3072,
1145
+ 192
1146
+ ],
1147
+ "dtype": "uint32",
1148
+ "format": "f32-to-bf16",
1149
+ "nbytes": 2359296,
1150
+ "byteOffset": 21685248
1151
+ },
1152
+ {
1153
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
1154
+ "shape": [
1155
+ 3072,
1156
+ 48
1157
+ ],
1158
+ "dtype": "float16",
1159
+ "format": "f32-to-bf16",
1160
+ "nbytes": 294912,
1161
+ "byteOffset": 24044544
1162
+ },
1163
+ {
1164
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
1165
+ "shape": [
1166
+ 1536,
1167
+ 192
1168
+ ],
1169
+ "dtype": "uint32",
1170
+ "format": "f32-to-bf16",
1171
+ "nbytes": 1179648,
1172
+ "byteOffset": 24339456
1173
+ },
1174
+ {
1175
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
1176
+ "shape": [
1177
+ 1536,
1178
+ 48
1179
+ ],
1180
+ "dtype": "float16",
1181
+ "format": "f32-to-bf16",
1182
+ "nbytes": 147456,
1183
+ "byteOffset": 25519104
1184
+ },
1185
+ {
1186
+ "name": "model.layers.4.input_layernorm.weight",
1187
+ "shape": [
1188
+ 1536
1189
+ ],
1190
+ "dtype": "float16",
1191
+ "format": "f32-to-bf16",
1192
+ "nbytes": 3072,
1193
+ "byteOffset": 25666560
1194
+ },
1195
+ {
1196
+ "name": "model.layers.4.mlp.down_proj.q_weight",
1197
+ "shape": [
1198
+ 1536,
1199
+ 512
1200
+ ],
1201
+ "dtype": "uint32",
1202
+ "format": "f32-to-bf16",
1203
+ "nbytes": 3145728,
1204
+ "byteOffset": 25669632
1205
+ },
1206
+ {
1207
+ "name": "model.layers.4.mlp.down_proj.q_scale",
1208
+ "shape": [
1209
+ 1536,
1210
+ 128
1211
+ ],
1212
+ "dtype": "float16",
1213
+ "format": "f32-to-bf16",
1214
+ "nbytes": 393216,
1215
+ "byteOffset": 28815360
1216
+ }
1217
+ ],
1218
+ "md5sum": "7a7cfa6e4a6a344163a815a0a3b85009"
1219
+ },
1220
+ {
1221
+ "dataPath": "params_shard_7.bin",
1222
+ "format": "raw-shard",
1223
+ "nbytes": 29208576,
1224
+ "records": [
1225
+ {
1226
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
1227
+ "shape": [
1228
+ 8192,
1229
+ 192
1230
+ ],
1231
+ "dtype": "uint32",
1232
+ "format": "f32-to-bf16",
1233
+ "nbytes": 6291456,
1234
+ "byteOffset": 0
1235
+ },
1236
+ {
1237
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
1238
+ "shape": [
1239
+ 8192,
1240
+ 48
1241
+ ],
1242
+ "dtype": "float16",
1243
+ "format": "f32-to-bf16",
1244
+ "nbytes": 786432,
1245
+ "byteOffset": 6291456
1246
+ },
1247
+ {
1248
+ "name": "model.layers.4.post_attention_layernorm.weight",
1249
+ "shape": [
1250
+ 1536
1251
+ ],
1252
+ "dtype": "float16",
1253
+ "format": "f32-to-bf16",
1254
+ "nbytes": 3072,
1255
+ "byteOffset": 7077888
1256
+ },
1257
+ {
1258
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
1259
+ "shape": [
1260
+ 3072,
1261
+ 192
1262
+ ],
1263
+ "dtype": "uint32",
1264
+ "format": "f32-to-bf16",
1265
+ "nbytes": 2359296,
1266
+ "byteOffset": 7080960
1267
+ },
1268
+ {
1269
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
1270
+ "shape": [
1271
+ 3072,
1272
+ 48
1273
+ ],
1274
+ "dtype": "float16",
1275
+ "format": "f32-to-bf16",
1276
+ "nbytes": 294912,
1277
+ "byteOffset": 9440256
1278
+ },
1279
+ {
1280
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
1281
+ "shape": [
1282
+ 1536,
1283
+ 192
1284
+ ],
1285
+ "dtype": "uint32",
1286
+ "format": "f32-to-bf16",
1287
+ "nbytes": 1179648,
1288
+ "byteOffset": 9735168
1289
+ },
1290
+ {
1291
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
1292
+ "shape": [
1293
+ 1536,
1294
+ 48
1295
+ ],
1296
+ "dtype": "float16",
1297
+ "format": "f32-to-bf16",
1298
+ "nbytes": 147456,
1299
+ "byteOffset": 10914816
1300
+ },
1301
+ {
1302
+ "name": "model.layers.5.input_layernorm.weight",
1303
+ "shape": [
1304
+ 1536
1305
+ ],
1306
+ "dtype": "float16",
1307
+ "format": "f32-to-bf16",
1308
+ "nbytes": 3072,
1309
+ "byteOffset": 11062272
1310
+ },
1311
+ {
1312
+ "name": "model.layers.5.mlp.down_proj.q_weight",
1313
+ "shape": [
1314
+ 1536,
1315
+ 512
1316
+ ],
1317
+ "dtype": "uint32",
1318
+ "format": "f32-to-bf16",
1319
+ "nbytes": 3145728,
1320
+ "byteOffset": 11065344
1321
+ },
1322
+ {
1323
+ "name": "model.layers.5.mlp.down_proj.q_scale",
1324
+ "shape": [
1325
+ 1536,
1326
+ 128
1327
+ ],
1328
+ "dtype": "float16",
1329
+ "format": "f32-to-bf16",
1330
+ "nbytes": 393216,
1331
+ "byteOffset": 14211072
1332
+ },
1333
+ {
1334
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
1335
+ "shape": [
1336
+ 8192,
1337
+ 192
1338
+ ],
1339
+ "dtype": "uint32",
1340
+ "format": "f32-to-bf16",
1341
+ "nbytes": 6291456,
1342
+ "byteOffset": 14604288
1343
+ },
1344
+ {
1345
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
1346
+ "shape": [
1347
+ 8192,
1348
+ 48
1349
+ ],
1350
+ "dtype": "float16",
1351
+ "format": "f32-to-bf16",
1352
+ "nbytes": 786432,
1353
+ "byteOffset": 20895744
1354
+ },
1355
+ {
1356
+ "name": "model.layers.5.post_attention_layernorm.weight",
1357
+ "shape": [
1358
+ 1536
1359
+ ],
1360
+ "dtype": "float16",
1361
+ "format": "f32-to-bf16",
1362
+ "nbytes": 3072,
1363
+ "byteOffset": 21682176
1364
+ },
1365
+ {
1366
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
1367
+ "shape": [
1368
+ 3072,
1369
+ 192
1370
+ ],
1371
+ "dtype": "uint32",
1372
+ "format": "f32-to-bf16",
1373
+ "nbytes": 2359296,
1374
+ "byteOffset": 21685248
1375
+ },
1376
+ {
1377
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
1378
+ "shape": [
1379
+ 3072,
1380
+ 48
1381
+ ],
1382
+ "dtype": "float16",
1383
+ "format": "f32-to-bf16",
1384
+ "nbytes": 294912,
1385
+ "byteOffset": 24044544
1386
+ },
1387
+ {
1388
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
1389
+ "shape": [
1390
+ 1536,
1391
+ 192
1392
+ ],
1393
+ "dtype": "uint32",
1394
+ "format": "f32-to-bf16",
1395
+ "nbytes": 1179648,
1396
+ "byteOffset": 24339456
1397
+ },
1398
+ {
1399
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
1400
+ "shape": [
1401
+ 1536,
1402
+ 48
1403
+ ],
1404
+ "dtype": "float16",
1405
+ "format": "f32-to-bf16",
1406
+ "nbytes": 147456,
1407
+ "byteOffset": 25519104
1408
+ },
1409
+ {
1410
+ "name": "model.layers.6.input_layernorm.weight",
1411
+ "shape": [
1412
+ 1536
1413
+ ],
1414
+ "dtype": "float16",
1415
+ "format": "f32-to-bf16",
1416
+ "nbytes": 3072,
1417
+ "byteOffset": 25666560
1418
+ },
1419
+ {
1420
+ "name": "model.layers.6.mlp.down_proj.q_weight",
1421
+ "shape": [
1422
+ 1536,
1423
+ 512
1424
+ ],
1425
+ "dtype": "uint32",
1426
+ "format": "f32-to-bf16",
1427
+ "nbytes": 3145728,
1428
+ "byteOffset": 25669632
1429
+ },
1430
+ {
1431
+ "name": "model.layers.6.mlp.down_proj.q_scale",
1432
+ "shape": [
1433
+ 1536,
1434
+ 128
1435
+ ],
1436
+ "dtype": "float16",
1437
+ "format": "f32-to-bf16",
1438
+ "nbytes": 393216,
1439
+ "byteOffset": 28815360
1440
+ }
1441
+ ],
1442
+ "md5sum": "43b8567c231fcd6ceb9d289e2a5e74c1"
1443
+ },
1444
+ {
1445
+ "dataPath": "params_shard_8.bin",
1446
+ "format": "raw-shard",
1447
+ "nbytes": 29208576,
1448
+ "records": [
1449
+ {
1450
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
1451
+ "shape": [
1452
+ 8192,
1453
+ 192
1454
+ ],
1455
+ "dtype": "uint32",
1456
+ "format": "f32-to-bf16",
1457
+ "nbytes": 6291456,
1458
+ "byteOffset": 0
1459
+ },
1460
+ {
1461
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
1462
+ "shape": [
1463
+ 8192,
1464
+ 48
1465
+ ],
1466
+ "dtype": "float16",
1467
+ "format": "f32-to-bf16",
1468
+ "nbytes": 786432,
1469
+ "byteOffset": 6291456
1470
+ },
1471
+ {
1472
+ "name": "model.layers.6.post_attention_layernorm.weight",
1473
+ "shape": [
1474
+ 1536
1475
+ ],
1476
+ "dtype": "float16",
1477
+ "format": "f32-to-bf16",
1478
+ "nbytes": 3072,
1479
+ "byteOffset": 7077888
1480
+ },
1481
+ {
1482
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
1483
+ "shape": [
1484
+ 3072,
1485
+ 192
1486
+ ],
1487
+ "dtype": "uint32",
1488
+ "format": "f32-to-bf16",
1489
+ "nbytes": 2359296,
1490
+ "byteOffset": 7080960
1491
+ },
1492
+ {
1493
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
1494
+ "shape": [
1495
+ 3072,
1496
+ 48
1497
+ ],
1498
+ "dtype": "float16",
1499
+ "format": "f32-to-bf16",
1500
+ "nbytes": 294912,
1501
+ "byteOffset": 9440256
1502
+ },
1503
+ {
1504
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
1505
+ "shape": [
1506
+ 1536,
1507
+ 192
1508
+ ],
1509
+ "dtype": "uint32",
1510
+ "format": "f32-to-bf16",
1511
+ "nbytes": 1179648,
1512
+ "byteOffset": 9735168
1513
+ },
1514
+ {
1515
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
1516
+ "shape": [
1517
+ 1536,
1518
+ 48
1519
+ ],
1520
+ "dtype": "float16",
1521
+ "format": "f32-to-bf16",
1522
+ "nbytes": 147456,
1523
+ "byteOffset": 10914816
1524
+ },
1525
+ {
1526
+ "name": "model.layers.7.input_layernorm.weight",
1527
+ "shape": [
1528
+ 1536
1529
+ ],
1530
+ "dtype": "float16",
1531
+ "format": "f32-to-bf16",
1532
+ "nbytes": 3072,
1533
+ "byteOffset": 11062272
1534
+ },
1535
+ {
1536
+ "name": "model.layers.7.mlp.down_proj.q_weight",
1537
+ "shape": [
1538
+ 1536,
1539
+ 512
1540
+ ],
1541
+ "dtype": "uint32",
1542
+ "format": "f32-to-bf16",
1543
+ "nbytes": 3145728,
1544
+ "byteOffset": 11065344
1545
+ },
1546
+ {
1547
+ "name": "model.layers.7.mlp.down_proj.q_scale",
1548
+ "shape": [
1549
+ 1536,
1550
+ 128
1551
+ ],
1552
+ "dtype": "float16",
1553
+ "format": "f32-to-bf16",
1554
+ "nbytes": 393216,
1555
+ "byteOffset": 14211072
1556
+ },
1557
+ {
1558
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
1559
+ "shape": [
1560
+ 8192,
1561
+ 192
1562
+ ],
1563
+ "dtype": "uint32",
1564
+ "format": "f32-to-bf16",
1565
+ "nbytes": 6291456,
1566
+ "byteOffset": 14604288
1567
+ },
1568
+ {
1569
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
1570
+ "shape": [
1571
+ 8192,
1572
+ 48
1573
+ ],
1574
+ "dtype": "float16",
1575
+ "format": "f32-to-bf16",
1576
+ "nbytes": 786432,
1577
+ "byteOffset": 20895744
1578
+ },
1579
+ {
1580
+ "name": "model.layers.7.post_attention_layernorm.weight",
1581
+ "shape": [
1582
+ 1536
1583
+ ],
1584
+ "dtype": "float16",
1585
+ "format": "f32-to-bf16",
1586
+ "nbytes": 3072,
1587
+ "byteOffset": 21682176
1588
+ },
1589
+ {
1590
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
1591
+ "shape": [
1592
+ 3072,
1593
+ 192
1594
+ ],
1595
+ "dtype": "uint32",
1596
+ "format": "f32-to-bf16",
1597
+ "nbytes": 2359296,
1598
+ "byteOffset": 21685248
1599
+ },
1600
+ {
1601
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
1602
+ "shape": [
1603
+ 3072,
1604
+ 48
1605
+ ],
1606
+ "dtype": "float16",
1607
+ "format": "f32-to-bf16",
1608
+ "nbytes": 294912,
1609
+ "byteOffset": 24044544
1610
+ },
1611
+ {
1612
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
1613
+ "shape": [
1614
+ 1536,
1615
+ 192
1616
+ ],
1617
+ "dtype": "uint32",
1618
+ "format": "f32-to-bf16",
1619
+ "nbytes": 1179648,
1620
+ "byteOffset": 24339456
1621
+ },
1622
+ {
1623
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
1624
+ "shape": [
1625
+ 1536,
1626
+ 48
1627
+ ],
1628
+ "dtype": "float16",
1629
+ "format": "f32-to-bf16",
1630
+ "nbytes": 147456,
1631
+ "byteOffset": 25519104
1632
+ },
1633
+ {
1634
+ "name": "model.layers.8.input_layernorm.weight",
1635
+ "shape": [
1636
+ 1536
1637
+ ],
1638
+ "dtype": "float16",
1639
+ "format": "f32-to-bf16",
1640
+ "nbytes": 3072,
1641
+ "byteOffset": 25666560
1642
+ },
1643
+ {
1644
+ "name": "model.layers.8.mlp.down_proj.q_weight",
1645
+ "shape": [
1646
+ 1536,
1647
+ 512
1648
+ ],
1649
+ "dtype": "uint32",
1650
+ "format": "f32-to-bf16",
1651
+ "nbytes": 3145728,
1652
+ "byteOffset": 25669632
1653
+ },
1654
+ {
1655
+ "name": "model.layers.8.mlp.down_proj.q_scale",
1656
+ "shape": [
1657
+ 1536,
1658
+ 128
1659
+ ],
1660
+ "dtype": "float16",
1661
+ "format": "f32-to-bf16",
1662
+ "nbytes": 393216,
1663
+ "byteOffset": 28815360
1664
+ }
1665
+ ],
1666
+ "md5sum": "42f4a095eccd005499430d3995f1dd3f"
1667
+ },
1668
+ {
1669
+ "dataPath": "params_shard_9.bin",
1670
+ "format": "raw-shard",
1671
+ "nbytes": 25669632,
1672
+ "records": [
1673
+ {
1674
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
1675
+ "shape": [
1676
+ 8192,
1677
+ 192
1678
+ ],
1679
+ "dtype": "uint32",
1680
+ "format": "f32-to-bf16",
1681
+ "nbytes": 6291456,
1682
+ "byteOffset": 0
1683
+ },
1684
+ {
1685
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
1686
+ "shape": [
1687
+ 8192,
1688
+ 48
1689
+ ],
1690
+ "dtype": "float16",
1691
+ "format": "f32-to-bf16",
1692
+ "nbytes": 786432,
1693
+ "byteOffset": 6291456
1694
+ },
1695
+ {
1696
+ "name": "model.layers.8.post_attention_layernorm.weight",
1697
+ "shape": [
1698
+ 1536
1699
+ ],
1700
+ "dtype": "float16",
1701
+ "format": "f32-to-bf16",
1702
+ "nbytes": 3072,
1703
+ "byteOffset": 7077888
1704
+ },
1705
+ {
1706
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
1707
+ "shape": [
1708
+ 3072,
1709
+ 192
1710
+ ],
1711
+ "dtype": "uint32",
1712
+ "format": "f32-to-bf16",
1713
+ "nbytes": 2359296,
1714
+ "byteOffset": 7080960
1715
+ },
1716
+ {
1717
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
1718
+ "shape": [
1719
+ 3072,
1720
+ 48
1721
+ ],
1722
+ "dtype": "float16",
1723
+ "format": "f32-to-bf16",
1724
+ "nbytes": 294912,
1725
+ "byteOffset": 9440256
1726
+ },
1727
+ {
1728
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
1729
+ "shape": [
1730
+ 1536,
1731
+ 192
1732
+ ],
1733
+ "dtype": "uint32",
1734
+ "format": "f32-to-bf16",
1735
+ "nbytes": 1179648,
1736
+ "byteOffset": 9735168
1737
+ },
1738
+ {
1739
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
1740
+ "shape": [
1741
+ 1536,
1742
+ 48
1743
+ ],
1744
+ "dtype": "float16",
1745
+ "format": "f32-to-bf16",
1746
+ "nbytes": 147456,
1747
+ "byteOffset": 10914816
1748
+ },
1749
+ {
1750
+ "name": "model.layers.9.input_layernorm.weight",
1751
+ "shape": [
1752
+ 1536
1753
+ ],
1754
+ "dtype": "float16",
1755
+ "format": "f32-to-bf16",
1756
+ "nbytes": 3072,
1757
+ "byteOffset": 11062272
1758
+ },
1759
+ {
1760
+ "name": "model.layers.9.mlp.down_proj.q_weight",
1761
+ "shape": [
1762
+ 1536,
1763
+ 512
1764
+ ],
1765
+ "dtype": "uint32",
1766
+ "format": "f32-to-bf16",
1767
+ "nbytes": 3145728,
1768
+ "byteOffset": 11065344
1769
+ },
1770
+ {
1771
+ "name": "model.layers.9.mlp.down_proj.q_scale",
1772
+ "shape": [
1773
+ 1536,
1774
+ 128
1775
+ ],
1776
+ "dtype": "float16",
1777
+ "format": "f32-to-bf16",
1778
+ "nbytes": 393216,
1779
+ "byteOffset": 14211072
1780
+ },
1781
+ {
1782
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
1783
+ "shape": [
1784
+ 8192,
1785
+ 192
1786
+ ],
1787
+ "dtype": "uint32",
1788
+ "format": "f32-to-bf16",
1789
+ "nbytes": 6291456,
1790
+ "byteOffset": 14604288
1791
+ },
1792
+ {
1793
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
1794
+ "shape": [
1795
+ 8192,
1796
+ 48
1797
+ ],
1798
+ "dtype": "float16",
1799
+ "format": "f32-to-bf16",
1800
+ "nbytes": 786432,
1801
+ "byteOffset": 20895744
1802
+ },
1803
+ {
1804
+ "name": "model.layers.9.post_attention_layernorm.weight",
1805
+ "shape": [
1806
+ 1536
1807
+ ],
1808
+ "dtype": "float16",
1809
+ "format": "f32-to-bf16",
1810
+ "nbytes": 3072,
1811
+ "byteOffset": 21682176
1812
+ },
1813
+ {
1814
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
1815
+ "shape": [
1816
+ 3072,
1817
+ 192
1818
+ ],
1819
+ "dtype": "uint32",
1820
+ "format": "f32-to-bf16",
1821
+ "nbytes": 2359296,
1822
+ "byteOffset": 21685248
1823
+ },
1824
+ {
1825
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
1826
+ "shape": [
1827
+ 3072,
1828
+ 48
1829
+ ],
1830
+ "dtype": "float16",
1831
+ "format": "f32-to-bf16",
1832
+ "nbytes": 294912,
1833
+ "byteOffset": 24044544
1834
+ },
1835
+ {
1836
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
1837
+ "shape": [
1838
+ 1536,
1839
+ 192
1840
+ ],
1841
+ "dtype": "uint32",
1842
+ "format": "f32-to-bf16",
1843
+ "nbytes": 1179648,
1844
+ "byteOffset": 24339456
1845
+ },
1846
+ {
1847
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
1848
+ "shape": [
1849
+ 1536,
1850
+ 48
1851
+ ],
1852
+ "dtype": "float16",
1853
+ "format": "f32-to-bf16",
1854
+ "nbytes": 147456,
1855
+ "byteOffset": 25519104
1856
+ },
1857
+ {
1858
+ "name": "model.norm.weight",
1859
+ "shape": [
1860
+ 1536
1861
+ ],
1862
+ "dtype": "float16",
1863
+ "format": "f32-to-bf16",
1864
+ "nbytes": 3072,
1865
+ "byteOffset": 25666560
1866
+ }
1867
+ ],
1868
+ "md5sum": "8d3a7724603c587b5c168bb4500903c0"
1869
+ }
1870
+ ]
1871
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:079b8dab0ea7ca80117f2f8d9aa271801251b11e1dc267f14d6e0f6489b94c6f
3
+ size 24576000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acb8faaea3cbf53cb689cba9e91447af6cb6679d8bc7caf8547a74b2b343f587
3
+ size 30723072
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38cc5ffed5ff9f2a50d6aed5de3aec7c37b7dff7d9cfcb16fbf65a2cda10d8af
3
+ size 32747520
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:554e2f64ba9d61874859234af794165047262da19ec34b09f55cee41837f5daa
3
+ size 29208576
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2daeb1818a4de7fc58d7df9c4e70aae80a0cf141354c2e489b47a57befca1f40
3
+ size 29208576
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab9bc7b485cf696d27e3377f71101ce3c22883a236cdb3aee9f3e1e3ad8b2bb1
3
+ size 29208576
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d09bd1e7b94dcd9406cd0d59ee93bb3fe6dd38df7ceb0275843ae999e565debe
3
+ size 29208576
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a5091a2856725a4d182d158fa5f87d0c4301c8a32a52d7deb73670794ae73be
3
+ size 29208576
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24e153439263c4d94384ccafb42df3800b5ac6d49bb0f86ab458e6d5328cb70d
3
+ size 29208576
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1a03be05ac6a4578a1eb79e78337dd194fc820a0490d8e05d3237ceea318b6e
3
+ size 25669632
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<s>",
33
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% if ((message['role'] == 'user') != (loop.index0 % 2 == 0)) or ((message['role'] == 'assistant') != (loop.index0 % 2 == 1)) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<|prompt|>' + message['content'].strip() + eos_token }}{% elif message['role'] == 'assistant' %}{{ '<|answer|>' + message['content'].strip() + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|answer|>' }}{% endif %}",
34
+ "clean_up_tokenization_spaces": false,
35
+ "cls_token": "</s>",
36
+ "eos_token": "</s>",
37
+ "legacy": true,
38
+ "model_max_length": 1000000000000000019884624838656,
39
+ "pad_token": "<unk>",
40
+ "sep_token": "</s>",
41
+ "sp_model_kwargs": {},
42
+ "spaces_between_special_tokens": false,
43
+ "tokenizer_class": "LlamaTokenizer",
44
+ "unk_token": "<unk>",
45
+ "use_default_system_prompt": false
46
+ }