Felladrin commited on
Commit
c7046a9
1 Parent(s): 7e5f0f5

Upload folder using huggingface_hub

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "quantization": "q4f32_1",
4
+ "model_config": {
5
+ "hidden_size": 768,
6
+ "intermediate_size": 3072,
7
+ "num_attention_heads": 24,
8
+ "num_hidden_layers": 6,
9
+ "rms_norm_eps": 1e-05,
10
+ "vocab_size": 32128,
11
+ "position_embedding_base": 10000.0,
12
+ "context_window_size": 1024,
13
+ "prefill_chunk_size": 1024,
14
+ "num_key_value_heads": 8,
15
+ "head_dim": 32,
16
+ "tensor_parallel_shards": 1
17
+ },
18
+ "vocab_size": 32128,
19
+ "context_window_size": 1024,
20
+ "sliding_window_size": -1,
21
+ "prefill_chunk_size": 1024,
22
+ "attention_sink_size": -1,
23
+ "tensor_parallel_shards": 1,
24
+ "mean_gen_len": 128,
25
+ "max_gen_len": 512,
26
+ "shift_fill_factor": 0.3,
27
+ "temperature": 0.7,
28
+ "repetition_penalty": 1.0,
29
+ "top_p": 0.95,
30
+ "conv_template": "chatml",
31
+ "pad_token_id": 2,
32
+ "bos_token_id": 1,
33
+ "eos_token_id": 2,
34
+ "tokenizer_files": [
35
+ "tokenizer.model",
36
+ "tokenizer.json",
37
+ "tokenizer_config.json"
38
+ ],
39
+ "version": "0.1.0"
40
+ }
model.wasm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e17430fcf959594517bc5f27328952892ea184e35ff70879869a77245ebc02f
3
+ size 2041222
ndarray-cache-b16.json ADDED
@@ -0,0 +1,727 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 65,
4
+ "ParamBytes": 63323136.0,
5
+ "BitsPerParam": 5.002662055470865
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 32629248,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.q_weight",
15
+ "shape": [
16
+ 32128,
17
+ 96
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 12337152,
22
+ "byteOffset": 0
23
+ },
24
+ {
25
+ "name": "lm_head.q_scale",
26
+ "shape": [
27
+ 32128,
28
+ 24
29
+ ],
30
+ "dtype": "bfloat16",
31
+ "format": "raw",
32
+ "nbytes": 1542144,
33
+ "byteOffset": 12337152
34
+ },
35
+ {
36
+ "name": "model.embed_tokens.q_weight",
37
+ "shape": [
38
+ 32128,
39
+ 96
40
+ ],
41
+ "dtype": "uint32",
42
+ "format": "f32-to-bf16",
43
+ "nbytes": 12337152,
44
+ "byteOffset": 13879296
45
+ },
46
+ {
47
+ "name": "model.embed_tokens.q_scale",
48
+ "shape": [
49
+ 32128,
50
+ 24
51
+ ],
52
+ "dtype": "bfloat16",
53
+ "format": "raw",
54
+ "nbytes": 1542144,
55
+ "byteOffset": 26216448
56
+ },
57
+ {
58
+ "name": "model.layers.0.input_layernorm.weight",
59
+ "shape": [
60
+ 768
61
+ ],
62
+ "dtype": "bfloat16",
63
+ "format": "raw",
64
+ "nbytes": 1536,
65
+ "byteOffset": 27758592
66
+ },
67
+ {
68
+ "name": "model.layers.0.mlp.down_proj.q_weight",
69
+ "shape": [
70
+ 768,
71
+ 384
72
+ ],
73
+ "dtype": "uint32",
74
+ "format": "f32-to-bf16",
75
+ "nbytes": 1179648,
76
+ "byteOffset": 27760128
77
+ },
78
+ {
79
+ "name": "model.layers.0.mlp.down_proj.q_scale",
80
+ "shape": [
81
+ 768,
82
+ 96
83
+ ],
84
+ "dtype": "bfloat16",
85
+ "format": "raw",
86
+ "nbytes": 147456,
87
+ "byteOffset": 28939776
88
+ },
89
+ {
90
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
91
+ "shape": [
92
+ 6144,
93
+ 96
94
+ ],
95
+ "dtype": "uint32",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 2359296,
98
+ "byteOffset": 29087232
99
+ },
100
+ {
101
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
102
+ "shape": [
103
+ 6144,
104
+ 24
105
+ ],
106
+ "dtype": "bfloat16",
107
+ "format": "raw",
108
+ "nbytes": 294912,
109
+ "byteOffset": 31446528
110
+ },
111
+ {
112
+ "name": "model.layers.0.post_attention_layernorm.weight",
113
+ "shape": [
114
+ 768
115
+ ],
116
+ "dtype": "bfloat16",
117
+ "format": "raw",
118
+ "nbytes": 1536,
119
+ "byteOffset": 31741440
120
+ },
121
+ {
122
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
123
+ "shape": [
124
+ 1280,
125
+ 96
126
+ ],
127
+ "dtype": "uint32",
128
+ "format": "f32-to-bf16",
129
+ "nbytes": 491520,
130
+ "byteOffset": 31742976
131
+ },
132
+ {
133
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
134
+ "shape": [
135
+ 1280,
136
+ 24
137
+ ],
138
+ "dtype": "bfloat16",
139
+ "format": "raw",
140
+ "nbytes": 61440,
141
+ "byteOffset": 32234496
142
+ },
143
+ {
144
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
145
+ "shape": [
146
+ 768,
147
+ 96
148
+ ],
149
+ "dtype": "uint32",
150
+ "format": "f32-to-bf16",
151
+ "nbytes": 294912,
152
+ "byteOffset": 32295936
153
+ },
154
+ {
155
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
156
+ "shape": [
157
+ 768,
158
+ 24
159
+ ],
160
+ "dtype": "bfloat16",
161
+ "format": "raw",
162
+ "nbytes": 36864,
163
+ "byteOffset": 32590848
164
+ },
165
+ {
166
+ "name": "model.layers.1.input_layernorm.weight",
167
+ "shape": [
168
+ 768
169
+ ],
170
+ "dtype": "bfloat16",
171
+ "format": "raw",
172
+ "nbytes": 1536,
173
+ "byteOffset": 32627712
174
+ }
175
+ ],
176
+ "md5sum": "9d6b148e542ce32b3d03a9ab2f3061ed"
177
+ },
178
+ {
179
+ "dataPath": "params_shard_1.bin",
180
+ "format": "raw-shard",
181
+ "nbytes": 24345600,
182
+ "records": [
183
+ {
184
+ "name": "model.layers.1.mlp.down_proj.q_weight",
185
+ "shape": [
186
+ 768,
187
+ 384
188
+ ],
189
+ "dtype": "uint32",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 1179648,
192
+ "byteOffset": 0
193
+ },
194
+ {
195
+ "name": "model.layers.1.mlp.down_proj.q_scale",
196
+ "shape": [
197
+ 768,
198
+ 96
199
+ ],
200
+ "dtype": "bfloat16",
201
+ "format": "raw",
202
+ "nbytes": 147456,
203
+ "byteOffset": 1179648
204
+ },
205
+ {
206
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
207
+ "shape": [
208
+ 6144,
209
+ 96
210
+ ],
211
+ "dtype": "uint32",
212
+ "format": "f32-to-bf16",
213
+ "nbytes": 2359296,
214
+ "byteOffset": 1327104
215
+ },
216
+ {
217
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
218
+ "shape": [
219
+ 6144,
220
+ 24
221
+ ],
222
+ "dtype": "bfloat16",
223
+ "format": "raw",
224
+ "nbytes": 294912,
225
+ "byteOffset": 3686400
226
+ },
227
+ {
228
+ "name": "model.layers.1.post_attention_layernorm.weight",
229
+ "shape": [
230
+ 768
231
+ ],
232
+ "dtype": "bfloat16",
233
+ "format": "raw",
234
+ "nbytes": 1536,
235
+ "byteOffset": 3981312
236
+ },
237
+ {
238
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
239
+ "shape": [
240
+ 1280,
241
+ 96
242
+ ],
243
+ "dtype": "uint32",
244
+ "format": "f32-to-bf16",
245
+ "nbytes": 491520,
246
+ "byteOffset": 3982848
247
+ },
248
+ {
249
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
250
+ "shape": [
251
+ 1280,
252
+ 24
253
+ ],
254
+ "dtype": "bfloat16",
255
+ "format": "raw",
256
+ "nbytes": 61440,
257
+ "byteOffset": 4474368
258
+ },
259
+ {
260
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
261
+ "shape": [
262
+ 768,
263
+ 96
264
+ ],
265
+ "dtype": "uint32",
266
+ "format": "f32-to-bf16",
267
+ "nbytes": 294912,
268
+ "byteOffset": 4535808
269
+ },
270
+ {
271
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
272
+ "shape": [
273
+ 768,
274
+ 24
275
+ ],
276
+ "dtype": "bfloat16",
277
+ "format": "raw",
278
+ "nbytes": 36864,
279
+ "byteOffset": 4830720
280
+ },
281
+ {
282
+ "name": "model.layers.2.input_layernorm.weight",
283
+ "shape": [
284
+ 768
285
+ ],
286
+ "dtype": "bfloat16",
287
+ "format": "raw",
288
+ "nbytes": 1536,
289
+ "byteOffset": 4867584
290
+ },
291
+ {
292
+ "name": "model.layers.2.mlp.down_proj.q_weight",
293
+ "shape": [
294
+ 768,
295
+ 384
296
+ ],
297
+ "dtype": "uint32",
298
+ "format": "f32-to-bf16",
299
+ "nbytes": 1179648,
300
+ "byteOffset": 4869120
301
+ },
302
+ {
303
+ "name": "model.layers.2.mlp.down_proj.q_scale",
304
+ "shape": [
305
+ 768,
306
+ 96
307
+ ],
308
+ "dtype": "bfloat16",
309
+ "format": "raw",
310
+ "nbytes": 147456,
311
+ "byteOffset": 6048768
312
+ },
313
+ {
314
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
315
+ "shape": [
316
+ 6144,
317
+ 96
318
+ ],
319
+ "dtype": "uint32",
320
+ "format": "f32-to-bf16",
321
+ "nbytes": 2359296,
322
+ "byteOffset": 6196224
323
+ },
324
+ {
325
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
326
+ "shape": [
327
+ 6144,
328
+ 24
329
+ ],
330
+ "dtype": "bfloat16",
331
+ "format": "raw",
332
+ "nbytes": 294912,
333
+ "byteOffset": 8555520
334
+ },
335
+ {
336
+ "name": "model.layers.2.post_attention_layernorm.weight",
337
+ "shape": [
338
+ 768
339
+ ],
340
+ "dtype": "bfloat16",
341
+ "format": "raw",
342
+ "nbytes": 1536,
343
+ "byteOffset": 8850432
344
+ },
345
+ {
346
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
347
+ "shape": [
348
+ 1280,
349
+ 96
350
+ ],
351
+ "dtype": "uint32",
352
+ "format": "f32-to-bf16",
353
+ "nbytes": 491520,
354
+ "byteOffset": 8851968
355
+ },
356
+ {
357
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
358
+ "shape": [
359
+ 1280,
360
+ 24
361
+ ],
362
+ "dtype": "bfloat16",
363
+ "format": "raw",
364
+ "nbytes": 61440,
365
+ "byteOffset": 9343488
366
+ },
367
+ {
368
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
369
+ "shape": [
370
+ 768,
371
+ 96
372
+ ],
373
+ "dtype": "uint32",
374
+ "format": "f32-to-bf16",
375
+ "nbytes": 294912,
376
+ "byteOffset": 9404928
377
+ },
378
+ {
379
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
380
+ "shape": [
381
+ 768,
382
+ 24
383
+ ],
384
+ "dtype": "bfloat16",
385
+ "format": "raw",
386
+ "nbytes": 36864,
387
+ "byteOffset": 9699840
388
+ },
389
+ {
390
+ "name": "model.layers.3.input_layernorm.weight",
391
+ "shape": [
392
+ 768
393
+ ],
394
+ "dtype": "bfloat16",
395
+ "format": "raw",
396
+ "nbytes": 1536,
397
+ "byteOffset": 9736704
398
+ },
399
+ {
400
+ "name": "model.layers.3.mlp.down_proj.q_weight",
401
+ "shape": [
402
+ 768,
403
+ 384
404
+ ],
405
+ "dtype": "uint32",
406
+ "format": "f32-to-bf16",
407
+ "nbytes": 1179648,
408
+ "byteOffset": 9738240
409
+ },
410
+ {
411
+ "name": "model.layers.3.mlp.down_proj.q_scale",
412
+ "shape": [
413
+ 768,
414
+ 96
415
+ ],
416
+ "dtype": "bfloat16",
417
+ "format": "raw",
418
+ "nbytes": 147456,
419
+ "byteOffset": 10917888
420
+ },
421
+ {
422
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
423
+ "shape": [
424
+ 6144,
425
+ 96
426
+ ],
427
+ "dtype": "uint32",
428
+ "format": "f32-to-bf16",
429
+ "nbytes": 2359296,
430
+ "byteOffset": 11065344
431
+ },
432
+ {
433
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
434
+ "shape": [
435
+ 6144,
436
+ 24
437
+ ],
438
+ "dtype": "bfloat16",
439
+ "format": "raw",
440
+ "nbytes": 294912,
441
+ "byteOffset": 13424640
442
+ },
443
+ {
444
+ "name": "model.layers.3.post_attention_layernorm.weight",
445
+ "shape": [
446
+ 768
447
+ ],
448
+ "dtype": "bfloat16",
449
+ "format": "raw",
450
+ "nbytes": 1536,
451
+ "byteOffset": 13719552
452
+ },
453
+ {
454
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
455
+ "shape": [
456
+ 1280,
457
+ 96
458
+ ],
459
+ "dtype": "uint32",
460
+ "format": "f32-to-bf16",
461
+ "nbytes": 491520,
462
+ "byteOffset": 13721088
463
+ },
464
+ {
465
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
466
+ "shape": [
467
+ 1280,
468
+ 24
469
+ ],
470
+ "dtype": "bfloat16",
471
+ "format": "raw",
472
+ "nbytes": 61440,
473
+ "byteOffset": 14212608
474
+ },
475
+ {
476
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
477
+ "shape": [
478
+ 768,
479
+ 96
480
+ ],
481
+ "dtype": "uint32",
482
+ "format": "f32-to-bf16",
483
+ "nbytes": 294912,
484
+ "byteOffset": 14274048
485
+ },
486
+ {
487
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
488
+ "shape": [
489
+ 768,
490
+ 24
491
+ ],
492
+ "dtype": "bfloat16",
493
+ "format": "raw",
494
+ "nbytes": 36864,
495
+ "byteOffset": 14568960
496
+ },
497
+ {
498
+ "name": "model.layers.4.input_layernorm.weight",
499
+ "shape": [
500
+ 768
501
+ ],
502
+ "dtype": "bfloat16",
503
+ "format": "raw",
504
+ "nbytes": 1536,
505
+ "byteOffset": 14605824
506
+ },
507
+ {
508
+ "name": "model.layers.4.mlp.down_proj.q_weight",
509
+ "shape": [
510
+ 768,
511
+ 384
512
+ ],
513
+ "dtype": "uint32",
514
+ "format": "f32-to-bf16",
515
+ "nbytes": 1179648,
516
+ "byteOffset": 14607360
517
+ },
518
+ {
519
+ "name": "model.layers.4.mlp.down_proj.q_scale",
520
+ "shape": [
521
+ 768,
522
+ 96
523
+ ],
524
+ "dtype": "bfloat16",
525
+ "format": "raw",
526
+ "nbytes": 147456,
527
+ "byteOffset": 15787008
528
+ },
529
+ {
530
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
531
+ "shape": [
532
+ 6144,
533
+ 96
534
+ ],
535
+ "dtype": "uint32",
536
+ "format": "f32-to-bf16",
537
+ "nbytes": 2359296,
538
+ "byteOffset": 15934464
539
+ },
540
+ {
541
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
542
+ "shape": [
543
+ 6144,
544
+ 24
545
+ ],
546
+ "dtype": "bfloat16",
547
+ "format": "raw",
548
+ "nbytes": 294912,
549
+ "byteOffset": 18293760
550
+ },
551
+ {
552
+ "name": "model.layers.4.post_attention_layernorm.weight",
553
+ "shape": [
554
+ 768
555
+ ],
556
+ "dtype": "bfloat16",
557
+ "format": "raw",
558
+ "nbytes": 1536,
559
+ "byteOffset": 18588672
560
+ },
561
+ {
562
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
563
+ "shape": [
564
+ 1280,
565
+ 96
566
+ ],
567
+ "dtype": "uint32",
568
+ "format": "f32-to-bf16",
569
+ "nbytes": 491520,
570
+ "byteOffset": 18590208
571
+ },
572
+ {
573
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
574
+ "shape": [
575
+ 1280,
576
+ 24
577
+ ],
578
+ "dtype": "bfloat16",
579
+ "format": "raw",
580
+ "nbytes": 61440,
581
+ "byteOffset": 19081728
582
+ },
583
+ {
584
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
585
+ "shape": [
586
+ 768,
587
+ 96
588
+ ],
589
+ "dtype": "uint32",
590
+ "format": "f32-to-bf16",
591
+ "nbytes": 294912,
592
+ "byteOffset": 19143168
593
+ },
594
+ {
595
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
596
+ "shape": [
597
+ 768,
598
+ 24
599
+ ],
600
+ "dtype": "bfloat16",
601
+ "format": "raw",
602
+ "nbytes": 36864,
603
+ "byteOffset": 19438080
604
+ },
605
+ {
606
+ "name": "model.layers.5.input_layernorm.weight",
607
+ "shape": [
608
+ 768
609
+ ],
610
+ "dtype": "bfloat16",
611
+ "format": "raw",
612
+ "nbytes": 1536,
613
+ "byteOffset": 19474944
614
+ },
615
+ {
616
+ "name": "model.layers.5.mlp.down_proj.q_weight",
617
+ "shape": [
618
+ 768,
619
+ 384
620
+ ],
621
+ "dtype": "uint32",
622
+ "format": "f32-to-bf16",
623
+ "nbytes": 1179648,
624
+ "byteOffset": 19476480
625
+ },
626
+ {
627
+ "name": "model.layers.5.mlp.down_proj.q_scale",
628
+ "shape": [
629
+ 768,
630
+ 96
631
+ ],
632
+ "dtype": "bfloat16",
633
+ "format": "raw",
634
+ "nbytes": 147456,
635
+ "byteOffset": 20656128
636
+ },
637
+ {
638
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
639
+ "shape": [
640
+ 6144,
641
+ 96
642
+ ],
643
+ "dtype": "uint32",
644
+ "format": "f32-to-bf16",
645
+ "nbytes": 2359296,
646
+ "byteOffset": 20803584
647
+ },
648
+ {
649
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
650
+ "shape": [
651
+ 6144,
652
+ 24
653
+ ],
654
+ "dtype": "bfloat16",
655
+ "format": "raw",
656
+ "nbytes": 294912,
657
+ "byteOffset": 23162880
658
+ },
659
+ {
660
+ "name": "model.layers.5.post_attention_layernorm.weight",
661
+ "shape": [
662
+ 768
663
+ ],
664
+ "dtype": "bfloat16",
665
+ "format": "raw",
666
+ "nbytes": 1536,
667
+ "byteOffset": 23457792
668
+ },
669
+ {
670
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
671
+ "shape": [
672
+ 1280,
673
+ 96
674
+ ],
675
+ "dtype": "uint32",
676
+ "format": "f32-to-bf16",
677
+ "nbytes": 491520,
678
+ "byteOffset": 23459328
679
+ },
680
+ {
681
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
682
+ "shape": [
683
+ 1280,
684
+ 24
685
+ ],
686
+ "dtype": "bfloat16",
687
+ "format": "raw",
688
+ "nbytes": 61440,
689
+ "byteOffset": 23950848
690
+ },
691
+ {
692
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
693
+ "shape": [
694
+ 768,
695
+ 96
696
+ ],
697
+ "dtype": "uint32",
698
+ "format": "f32-to-bf16",
699
+ "nbytes": 294912,
700
+ "byteOffset": 24012288
701
+ },
702
+ {
703
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
704
+ "shape": [
705
+ 768,
706
+ 24
707
+ ],
708
+ "dtype": "bfloat16",
709
+ "format": "raw",
710
+ "nbytes": 36864,
711
+ "byteOffset": 24307200
712
+ },
713
+ {
714
+ "name": "model.norm.weight",
715
+ "shape": [
716
+ 768
717
+ ],
718
+ "dtype": "bfloat16",
719
+ "format": "raw",
720
+ "nbytes": 1536,
721
+ "byteOffset": 24344064
722
+ }
723
+ ],
724
+ "md5sum": "0de23251ab4684bf693a425bc20ab403"
725
+ }
726
+ ]
727
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,727 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 65,
4
+ "ParamBytes": 63323136.0,
5
+ "BitsPerParam": 5.002662055470865
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 32629248,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.q_weight",
15
+ "shape": [
16
+ 32128,
17
+ 96
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 12337152,
22
+ "byteOffset": 0
23
+ },
24
+ {
25
+ "name": "lm_head.q_scale",
26
+ "shape": [
27
+ 32128,
28
+ 24
29
+ ],
30
+ "dtype": "float32",
31
+ "format": "f32-to-bf16",
32
+ "nbytes": 1542144,
33
+ "byteOffset": 12337152
34
+ },
35
+ {
36
+ "name": "model.embed_tokens.q_weight",
37
+ "shape": [
38
+ 32128,
39
+ 96
40
+ ],
41
+ "dtype": "uint32",
42
+ "format": "f32-to-bf16",
43
+ "nbytes": 12337152,
44
+ "byteOffset": 13879296
45
+ },
46
+ {
47
+ "name": "model.embed_tokens.q_scale",
48
+ "shape": [
49
+ 32128,
50
+ 24
51
+ ],
52
+ "dtype": "float32",
53
+ "format": "f32-to-bf16",
54
+ "nbytes": 1542144,
55
+ "byteOffset": 26216448
56
+ },
57
+ {
58
+ "name": "model.layers.0.input_layernorm.weight",
59
+ "shape": [
60
+ 768
61
+ ],
62
+ "dtype": "float32",
63
+ "format": "f32-to-bf16",
64
+ "nbytes": 1536,
65
+ "byteOffset": 27758592
66
+ },
67
+ {
68
+ "name": "model.layers.0.mlp.down_proj.q_weight",
69
+ "shape": [
70
+ 768,
71
+ 384
72
+ ],
73
+ "dtype": "uint32",
74
+ "format": "f32-to-bf16",
75
+ "nbytes": 1179648,
76
+ "byteOffset": 27760128
77
+ },
78
+ {
79
+ "name": "model.layers.0.mlp.down_proj.q_scale",
80
+ "shape": [
81
+ 768,
82
+ 96
83
+ ],
84
+ "dtype": "float32",
85
+ "format": "f32-to-bf16",
86
+ "nbytes": 147456,
87
+ "byteOffset": 28939776
88
+ },
89
+ {
90
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
91
+ "shape": [
92
+ 6144,
93
+ 96
94
+ ],
95
+ "dtype": "uint32",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 2359296,
98
+ "byteOffset": 29087232
99
+ },
100
+ {
101
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
102
+ "shape": [
103
+ 6144,
104
+ 24
105
+ ],
106
+ "dtype": "float32",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 294912,
109
+ "byteOffset": 31446528
110
+ },
111
+ {
112
+ "name": "model.layers.0.post_attention_layernorm.weight",
113
+ "shape": [
114
+ 768
115
+ ],
116
+ "dtype": "float32",
117
+ "format": "f32-to-bf16",
118
+ "nbytes": 1536,
119
+ "byteOffset": 31741440
120
+ },
121
+ {
122
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
123
+ "shape": [
124
+ 1280,
125
+ 96
126
+ ],
127
+ "dtype": "uint32",
128
+ "format": "f32-to-bf16",
129
+ "nbytes": 491520,
130
+ "byteOffset": 31742976
131
+ },
132
+ {
133
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
134
+ "shape": [
135
+ 1280,
136
+ 24
137
+ ],
138
+ "dtype": "float32",
139
+ "format": "f32-to-bf16",
140
+ "nbytes": 61440,
141
+ "byteOffset": 32234496
142
+ },
143
+ {
144
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
145
+ "shape": [
146
+ 768,
147
+ 96
148
+ ],
149
+ "dtype": "uint32",
150
+ "format": "f32-to-bf16",
151
+ "nbytes": 294912,
152
+ "byteOffset": 32295936
153
+ },
154
+ {
155
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
156
+ "shape": [
157
+ 768,
158
+ 24
159
+ ],
160
+ "dtype": "float32",
161
+ "format": "f32-to-bf16",
162
+ "nbytes": 36864,
163
+ "byteOffset": 32590848
164
+ },
165
+ {
166
+ "name": "model.layers.1.input_layernorm.weight",
167
+ "shape": [
168
+ 768
169
+ ],
170
+ "dtype": "float32",
171
+ "format": "f32-to-bf16",
172
+ "nbytes": 1536,
173
+ "byteOffset": 32627712
174
+ }
175
+ ],
176
+ "md5sum": "9d6b148e542ce32b3d03a9ab2f3061ed"
177
+ },
178
+ {
179
+ "dataPath": "params_shard_1.bin",
180
+ "format": "raw-shard",
181
+ "nbytes": 24345600,
182
+ "records": [
183
+ {
184
+ "name": "model.layers.1.mlp.down_proj.q_weight",
185
+ "shape": [
186
+ 768,
187
+ 384
188
+ ],
189
+ "dtype": "uint32",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 1179648,
192
+ "byteOffset": 0
193
+ },
194
+ {
195
+ "name": "model.layers.1.mlp.down_proj.q_scale",
196
+ "shape": [
197
+ 768,
198
+ 96
199
+ ],
200
+ "dtype": "float32",
201
+ "format": "f32-to-bf16",
202
+ "nbytes": 147456,
203
+ "byteOffset": 1179648
204
+ },
205
+ {
206
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
207
+ "shape": [
208
+ 6144,
209
+ 96
210
+ ],
211
+ "dtype": "uint32",
212
+ "format": "f32-to-bf16",
213
+ "nbytes": 2359296,
214
+ "byteOffset": 1327104
215
+ },
216
+ {
217
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
218
+ "shape": [
219
+ 6144,
220
+ 24
221
+ ],
222
+ "dtype": "float32",
223
+ "format": "f32-to-bf16",
224
+ "nbytes": 294912,
225
+ "byteOffset": 3686400
226
+ },
227
+ {
228
+ "name": "model.layers.1.post_attention_layernorm.weight",
229
+ "shape": [
230
+ 768
231
+ ],
232
+ "dtype": "float32",
233
+ "format": "f32-to-bf16",
234
+ "nbytes": 1536,
235
+ "byteOffset": 3981312
236
+ },
237
+ {
238
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
239
+ "shape": [
240
+ 1280,
241
+ 96
242
+ ],
243
+ "dtype": "uint32",
244
+ "format": "f32-to-bf16",
245
+ "nbytes": 491520,
246
+ "byteOffset": 3982848
247
+ },
248
+ {
249
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
250
+ "shape": [
251
+ 1280,
252
+ 24
253
+ ],
254
+ "dtype": "float32",
255
+ "format": "f32-to-bf16",
256
+ "nbytes": 61440,
257
+ "byteOffset": 4474368
258
+ },
259
+ {
260
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
261
+ "shape": [
262
+ 768,
263
+ 96
264
+ ],
265
+ "dtype": "uint32",
266
+ "format": "f32-to-bf16",
267
+ "nbytes": 294912,
268
+ "byteOffset": 4535808
269
+ },
270
+ {
271
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
272
+ "shape": [
273
+ 768,
274
+ 24
275
+ ],
276
+ "dtype": "float32",
277
+ "format": "f32-to-bf16",
278
+ "nbytes": 36864,
279
+ "byteOffset": 4830720
280
+ },
281
+ {
282
+ "name": "model.layers.2.input_layernorm.weight",
283
+ "shape": [
284
+ 768
285
+ ],
286
+ "dtype": "float32",
287
+ "format": "f32-to-bf16",
288
+ "nbytes": 1536,
289
+ "byteOffset": 4867584
290
+ },
291
+ {
292
+ "name": "model.layers.2.mlp.down_proj.q_weight",
293
+ "shape": [
294
+ 768,
295
+ 384
296
+ ],
297
+ "dtype": "uint32",
298
+ "format": "f32-to-bf16",
299
+ "nbytes": 1179648,
300
+ "byteOffset": 4869120
301
+ },
302
+ {
303
+ "name": "model.layers.2.mlp.down_proj.q_scale",
304
+ "shape": [
305
+ 768,
306
+ 96
307
+ ],
308
+ "dtype": "float32",
309
+ "format": "f32-to-bf16",
310
+ "nbytes": 147456,
311
+ "byteOffset": 6048768
312
+ },
313
+ {
314
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
315
+ "shape": [
316
+ 6144,
317
+ 96
318
+ ],
319
+ "dtype": "uint32",
320
+ "format": "f32-to-bf16",
321
+ "nbytes": 2359296,
322
+ "byteOffset": 6196224
323
+ },
324
+ {
325
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
326
+ "shape": [
327
+ 6144,
328
+ 24
329
+ ],
330
+ "dtype": "float32",
331
+ "format": "f32-to-bf16",
332
+ "nbytes": 294912,
333
+ "byteOffset": 8555520
334
+ },
335
+ {
336
+ "name": "model.layers.2.post_attention_layernorm.weight",
337
+ "shape": [
338
+ 768
339
+ ],
340
+ "dtype": "float32",
341
+ "format": "f32-to-bf16",
342
+ "nbytes": 1536,
343
+ "byteOffset": 8850432
344
+ },
345
+ {
346
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
347
+ "shape": [
348
+ 1280,
349
+ 96
350
+ ],
351
+ "dtype": "uint32",
352
+ "format": "f32-to-bf16",
353
+ "nbytes": 491520,
354
+ "byteOffset": 8851968
355
+ },
356
+ {
357
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
358
+ "shape": [
359
+ 1280,
360
+ 24
361
+ ],
362
+ "dtype": "float32",
363
+ "format": "f32-to-bf16",
364
+ "nbytes": 61440,
365
+ "byteOffset": 9343488
366
+ },
367
+ {
368
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
369
+ "shape": [
370
+ 768,
371
+ 96
372
+ ],
373
+ "dtype": "uint32",
374
+ "format": "f32-to-bf16",
375
+ "nbytes": 294912,
376
+ "byteOffset": 9404928
377
+ },
378
+ {
379
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
380
+ "shape": [
381
+ 768,
382
+ 24
383
+ ],
384
+ "dtype": "float32",
385
+ "format": "f32-to-bf16",
386
+ "nbytes": 36864,
387
+ "byteOffset": 9699840
388
+ },
389
+ {
390
+ "name": "model.layers.3.input_layernorm.weight",
391
+ "shape": [
392
+ 768
393
+ ],
394
+ "dtype": "float32",
395
+ "format": "f32-to-bf16",
396
+ "nbytes": 1536,
397
+ "byteOffset": 9736704
398
+ },
399
+ {
400
+ "name": "model.layers.3.mlp.down_proj.q_weight",
401
+ "shape": [
402
+ 768,
403
+ 384
404
+ ],
405
+ "dtype": "uint32",
406
+ "format": "f32-to-bf16",
407
+ "nbytes": 1179648,
408
+ "byteOffset": 9738240
409
+ },
410
+ {
411
+ "name": "model.layers.3.mlp.down_proj.q_scale",
412
+ "shape": [
413
+ 768,
414
+ 96
415
+ ],
416
+ "dtype": "float32",
417
+ "format": "f32-to-bf16",
418
+ "nbytes": 147456,
419
+ "byteOffset": 10917888
420
+ },
421
+ {
422
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
423
+ "shape": [
424
+ 6144,
425
+ 96
426
+ ],
427
+ "dtype": "uint32",
428
+ "format": "f32-to-bf16",
429
+ "nbytes": 2359296,
430
+ "byteOffset": 11065344
431
+ },
432
+ {
433
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
434
+ "shape": [
435
+ 6144,
436
+ 24
437
+ ],
438
+ "dtype": "float32",
439
+ "format": "f32-to-bf16",
440
+ "nbytes": 294912,
441
+ "byteOffset": 13424640
442
+ },
443
+ {
444
+ "name": "model.layers.3.post_attention_layernorm.weight",
445
+ "shape": [
446
+ 768
447
+ ],
448
+ "dtype": "float32",
449
+ "format": "f32-to-bf16",
450
+ "nbytes": 1536,
451
+ "byteOffset": 13719552
452
+ },
453
+ {
454
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
455
+ "shape": [
456
+ 1280,
457
+ 96
458
+ ],
459
+ "dtype": "uint32",
460
+ "format": "f32-to-bf16",
461
+ "nbytes": 491520,
462
+ "byteOffset": 13721088
463
+ },
464
+ {
465
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
466
+ "shape": [
467
+ 1280,
468
+ 24
469
+ ],
470
+ "dtype": "float32",
471
+ "format": "f32-to-bf16",
472
+ "nbytes": 61440,
473
+ "byteOffset": 14212608
474
+ },
475
+ {
476
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
477
+ "shape": [
478
+ 768,
479
+ 96
480
+ ],
481
+ "dtype": "uint32",
482
+ "format": "f32-to-bf16",
483
+ "nbytes": 294912,
484
+ "byteOffset": 14274048
485
+ },
486
+ {
487
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
488
+ "shape": [
489
+ 768,
490
+ 24
491
+ ],
492
+ "dtype": "float32",
493
+ "format": "f32-to-bf16",
494
+ "nbytes": 36864,
495
+ "byteOffset": 14568960
496
+ },
497
+ {
498
+ "name": "model.layers.4.input_layernorm.weight",
499
+ "shape": [
500
+ 768
501
+ ],
502
+ "dtype": "float32",
503
+ "format": "f32-to-bf16",
504
+ "nbytes": 1536,
505
+ "byteOffset": 14605824
506
+ },
507
+ {
508
+ "name": "model.layers.4.mlp.down_proj.q_weight",
509
+ "shape": [
510
+ 768,
511
+ 384
512
+ ],
513
+ "dtype": "uint32",
514
+ "format": "f32-to-bf16",
515
+ "nbytes": 1179648,
516
+ "byteOffset": 14607360
517
+ },
518
+ {
519
+ "name": "model.layers.4.mlp.down_proj.q_scale",
520
+ "shape": [
521
+ 768,
522
+ 96
523
+ ],
524
+ "dtype": "float32",
525
+ "format": "f32-to-bf16",
526
+ "nbytes": 147456,
527
+ "byteOffset": 15787008
528
+ },
529
+ {
530
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
531
+ "shape": [
532
+ 6144,
533
+ 96
534
+ ],
535
+ "dtype": "uint32",
536
+ "format": "f32-to-bf16",
537
+ "nbytes": 2359296,
538
+ "byteOffset": 15934464
539
+ },
540
+ {
541
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
542
+ "shape": [
543
+ 6144,
544
+ 24
545
+ ],
546
+ "dtype": "float32",
547
+ "format": "f32-to-bf16",
548
+ "nbytes": 294912,
549
+ "byteOffset": 18293760
550
+ },
551
+ {
552
+ "name": "model.layers.4.post_attention_layernorm.weight",
553
+ "shape": [
554
+ 768
555
+ ],
556
+ "dtype": "float32",
557
+ "format": "f32-to-bf16",
558
+ "nbytes": 1536,
559
+ "byteOffset": 18588672
560
+ },
561
+ {
562
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
563
+ "shape": [
564
+ 1280,
565
+ 96
566
+ ],
567
+ "dtype": "uint32",
568
+ "format": "f32-to-bf16",
569
+ "nbytes": 491520,
570
+ "byteOffset": 18590208
571
+ },
572
+ {
573
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
574
+ "shape": [
575
+ 1280,
576
+ 24
577
+ ],
578
+ "dtype": "float32",
579
+ "format": "f32-to-bf16",
580
+ "nbytes": 61440,
581
+ "byteOffset": 19081728
582
+ },
583
+ {
584
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
585
+ "shape": [
586
+ 768,
587
+ 96
588
+ ],
589
+ "dtype": "uint32",
590
+ "format": "f32-to-bf16",
591
+ "nbytes": 294912,
592
+ "byteOffset": 19143168
593
+ },
594
+ {
595
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
596
+ "shape": [
597
+ 768,
598
+ 24
599
+ ],
600
+ "dtype": "float32",
601
+ "format": "f32-to-bf16",
602
+ "nbytes": 36864,
603
+ "byteOffset": 19438080
604
+ },
605
+ {
606
+ "name": "model.layers.5.input_layernorm.weight",
607
+ "shape": [
608
+ 768
609
+ ],
610
+ "dtype": "float32",
611
+ "format": "f32-to-bf16",
612
+ "nbytes": 1536,
613
+ "byteOffset": 19474944
614
+ },
615
+ {
616
+ "name": "model.layers.5.mlp.down_proj.q_weight",
617
+ "shape": [
618
+ 768,
619
+ 384
620
+ ],
621
+ "dtype": "uint32",
622
+ "format": "f32-to-bf16",
623
+ "nbytes": 1179648,
624
+ "byteOffset": 19476480
625
+ },
626
+ {
627
+ "name": "model.layers.5.mlp.down_proj.q_scale",
628
+ "shape": [
629
+ 768,
630
+ 96
631
+ ],
632
+ "dtype": "float32",
633
+ "format": "f32-to-bf16",
634
+ "nbytes": 147456,
635
+ "byteOffset": 20656128
636
+ },
637
+ {
638
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
639
+ "shape": [
640
+ 6144,
641
+ 96
642
+ ],
643
+ "dtype": "uint32",
644
+ "format": "f32-to-bf16",
645
+ "nbytes": 2359296,
646
+ "byteOffset": 20803584
647
+ },
648
+ {
649
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
650
+ "shape": [
651
+ 6144,
652
+ 24
653
+ ],
654
+ "dtype": "float32",
655
+ "format": "f32-to-bf16",
656
+ "nbytes": 294912,
657
+ "byteOffset": 23162880
658
+ },
659
+ {
660
+ "name": "model.layers.5.post_attention_layernorm.weight",
661
+ "shape": [
662
+ 768
663
+ ],
664
+ "dtype": "float32",
665
+ "format": "f32-to-bf16",
666
+ "nbytes": 1536,
667
+ "byteOffset": 23457792
668
+ },
669
+ {
670
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
671
+ "shape": [
672
+ 1280,
673
+ 96
674
+ ],
675
+ "dtype": "uint32",
676
+ "format": "f32-to-bf16",
677
+ "nbytes": 491520,
678
+ "byteOffset": 23459328
679
+ },
680
+ {
681
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
682
+ "shape": [
683
+ 1280,
684
+ 24
685
+ ],
686
+ "dtype": "float32",
687
+ "format": "f32-to-bf16",
688
+ "nbytes": 61440,
689
+ "byteOffset": 23950848
690
+ },
691
+ {
692
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
693
+ "shape": [
694
+ 768,
695
+ 96
696
+ ],
697
+ "dtype": "uint32",
698
+ "format": "f32-to-bf16",
699
+ "nbytes": 294912,
700
+ "byteOffset": 24012288
701
+ },
702
+ {
703
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
704
+ "shape": [
705
+ 768,
706
+ 24
707
+ ],
708
+ "dtype": "float32",
709
+ "format": "f32-to-bf16",
710
+ "nbytes": 36864,
711
+ "byteOffset": 24307200
712
+ },
713
+ {
714
+ "name": "model.norm.weight",
715
+ "shape": [
716
+ 768
717
+ ],
718
+ "dtype": "float32",
719
+ "format": "f32-to-bf16",
720
+ "nbytes": 1536,
721
+ "byteOffset": 24344064
722
+ }
723
+ ],
724
+ "md5sum": "0de23251ab4684bf693a425bc20ab403"
725
+ }
726
+ ]
727
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ed91d16b43d868d7aececa8546c4e61c48f86cafccf25e1485bf613ecaae9c6
3
+ size 32629248
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e839ef82baf0e29167357591d145ae249f9742d4034d7dbcec93dc2cbc3a26e
3
+ size 24345600
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "<s>",
29
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
30
+ "clean_up_tokenization_spaces": false,
31
+ "eos_token": "</s>",
32
+ "legacy": false,
33
+ "model_max_length": 1000000000000000019884624838656,
34
+ "pad_token": "</s>",
35
+ "padding_side": "right",
36
+ "sp_model_kwargs": {},
37
+ "spaces_between_special_tokens": false,
38
+ "tokenizer_class": "LlamaTokenizer",
39
+ "unk_token": "<unk>",
40
+ "use_default_system_prompt": false
41
+ }