cl-modelcloud
commited on
Commit
•
b0f05c6
1
Parent(s):
32ed296
1404a834ae25c5958a5475b83ab98ae33d15cfd3ef34c31d26e99759d7c94986
Browse files- config.json +49 -0
- model.safetensors +3 -0
- quant_log.json +1 -0
- quantize_config.json +19 -0
- special_tokens_map.json +30 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +43 -0
config.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/monster/data/model/TinyLlama-1.1B-Chat-v1.0",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"head_dim": 64,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 2048,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 5632,
|
15 |
+
"max_position_embeddings": 2048,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"num_attention_heads": 32,
|
19 |
+
"num_hidden_layers": 22,
|
20 |
+
"num_key_value_heads": 4,
|
21 |
+
"pretraining_tp": 1,
|
22 |
+
"quantization_config": {
|
23 |
+
"bits": 4,
|
24 |
+
"checkpoint_format": "gptq",
|
25 |
+
"damp_auto_increment": 0.0015,
|
26 |
+
"damp_percent": 0.005,
|
27 |
+
"desc_act": false,
|
28 |
+
"dynamic": null,
|
29 |
+
"group_size": 128,
|
30 |
+
"lm_head": false,
|
31 |
+
"meta": {
|
32 |
+
"quantizer": "gptqmodel:1.0.10-dev"
|
33 |
+
},
|
34 |
+
"model_file_base_name": null,
|
35 |
+
"model_name_or_path": null,
|
36 |
+
"quant_method": "gptq",
|
37 |
+
"static_groups": false,
|
38 |
+
"sym": true,
|
39 |
+
"true_sequential": true
|
40 |
+
},
|
41 |
+
"rms_norm_eps": 1e-05,
|
42 |
+
"rope_scaling": null,
|
43 |
+
"rope_theta": 10000.0,
|
44 |
+
"tie_word_embeddings": false,
|
45 |
+
"torch_dtype": "bfloat16",
|
46 |
+
"transformers_version": "4.45.2",
|
47 |
+
"use_cache": true,
|
48 |
+
"vocab_size": 32000
|
49 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:882f363cd8bc71958e4225e56286c7381e6621c0bcb361f22ca78e2a26618ebf
|
3 |
+
size 767343824
|
quant_log.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"layer": 0, "module": "self_attn.k_proj", "loss": "0.11426", "damp": "0.00500", "layer_forward_time": "1.138", "w_clone_time": "0.001", "quant_time": "4.205"}, {"layer": 0, "module": "self_attn.v_proj", "loss": "0.00036", "damp": "0.00500", "layer_forward_time": "1.138", "w_clone_time": "0.001", "quant_time": "0.480"}, {"layer": 0, "module": "self_attn.q_proj", "loss": "0.12988", "damp": "0.00500", "layer_forward_time": "1.138", "w_clone_time": "0.006", "quant_time": "0.923"}, {"layer": 0, "module": "self_attn.o_proj", "loss": "0.00002", "damp": "0.00500", "layer_forward_time": "1.320", "w_clone_time": "0.008", "quant_time": "0.306"}, {"layer": 0, "module": "mlp.up_proj", "loss": "0.04541", "damp": "0.00500", "layer_forward_time": "1.335", "w_clone_time": "0.019", "quant_time": "0.407"}, {"layer": 0, "module": "mlp.gate_proj", "loss": "0.05127", "damp": "0.00500", "layer_forward_time": "1.335", "w_clone_time": "0.017", "quant_time": "0.297"}, {"layer": 0, "module": "mlp.down_proj", "loss": "0.00014", "damp": "0.00500", "layer_forward_time": "2.098", "w_clone_time": "0.017", "quant_time": "0.895"}, {"layer": 1, "module": "self_attn.k_proj", "loss": "0.24707", "damp": "0.00500", "layer_forward_time": "1.357", "w_clone_time": "0.006", "quant_time": "0.320"}, {"layer": 1, "module": "self_attn.v_proj", "loss": "0.00188", "damp": "0.00500", "layer_forward_time": "1.357", "w_clone_time": "0.005", "quant_time": "0.555"}, {"layer": 1, "module": "self_attn.q_proj", "loss": "0.34375", "damp": "0.00500", "layer_forward_time": "1.357", "w_clone_time": "0.008", "quant_time": "0.293"}, {"layer": 1, "module": "self_attn.o_proj", "loss": "0.00022", "damp": "0.00500", "layer_forward_time": "0.806", "w_clone_time": "0.006", "quant_time": "0.292"}, {"layer": 1, "module": "mlp.up_proj", "loss": "0.11523", "damp": "0.00500", "layer_forward_time": "0.982", "w_clone_time": "0.020", "quant_time": "0.293"}, {"layer": 1, "module": "mlp.gate_proj", "loss": "0.12891", "damp": "0.00500", "layer_forward_time": "0.982", "w_clone_time": "0.016", "quant_time": "0.295"}, {"layer": 1, "module": "mlp.down_proj", "loss": "0.00048", "damp": "0.00500", "layer_forward_time": "1.846", "w_clone_time": "0.020", "quant_time": "1.021"}, {"layer": 2, "module": "self_attn.k_proj", "loss": "0.16895", "damp": "0.00500", "layer_forward_time": "0.979", "w_clone_time": "0.008", "quant_time": "0.290"}, {"layer": 2, "module": "self_attn.v_proj", "loss": "0.00383", "damp": "0.00500", "layer_forward_time": "0.979", "w_clone_time": "0.001", "quant_time": "0.289"}, {"layer": 2, "module": "self_attn.q_proj", "loss": "0.31445", "damp": "0.00500", "layer_forward_time": "0.979", "w_clone_time": "0.006", "quant_time": "0.292"}, {"layer": 2, "module": "self_attn.o_proj", "loss": "0.00035", "damp": "0.00500", "layer_forward_time": "0.802", "w_clone_time": "0.009", "quant_time": "0.305"}, {"layer": 2, "module": "mlp.up_proj", "loss": "0.21582", "damp": "0.00500", "layer_forward_time": "0.935", "w_clone_time": "0.017", "quant_time": "0.294"}, {"layer": 2, "module": "mlp.gate_proj", "loss": "0.24512", "damp": "0.00500", "layer_forward_time": "0.935", "w_clone_time": "0.017", "quant_time": "0.291"}, {"layer": 2, "module": "mlp.down_proj", "loss": "0.00861", "damp": "0.00500", "layer_forward_time": "1.984", "w_clone_time": "0.019", "quant_time": "0.845"}, {"layer": 3, "module": "self_attn.k_proj", "loss": "0.28516", "damp": "0.00500", "layer_forward_time": "1.040", "w_clone_time": "0.002", "quant_time": "0.312"}, {"layer": 3, "module": "self_attn.v_proj", "loss": "0.01343", "damp": "0.00500", "layer_forward_time": "1.040", "w_clone_time": "0.012", "quant_time": "0.315"}, {"layer": 3, "module": "self_attn.q_proj", "loss": "0.68359", "damp": "0.00500", "layer_forward_time": "1.040", "w_clone_time": "0.010", "quant_time": "0.296"}, {"layer": 3, "module": "self_attn.o_proj", "loss": "0.00032", "damp": "0.00500", "layer_forward_time": "0.813", "w_clone_time": "0.010", "quant_time": "0.297"}, {"layer": 3, "module": "mlp.up_proj", "loss": "0.29688", "damp": "0.00500", "layer_forward_time": "0.912", "w_clone_time": "0.017", "quant_time": "0.295"}, {"layer": 3, "module": "mlp.gate_proj", "loss": "0.34180", "damp": "0.00500", "layer_forward_time": "0.912", "w_clone_time": "0.019", "quant_time": "0.294"}, {"layer": 3, "module": "mlp.down_proj", "loss": "0.00179", "damp": "0.00500", "layer_forward_time": "1.683", "w_clone_time": "0.021", "quant_time": "0.811"}, {"layer": 4, "module": "self_attn.k_proj", "loss": "0.66406", "damp": "0.00500", "layer_forward_time": "1.199", "w_clone_time": "0.001", "quant_time": "0.300"}, {"layer": 4, "module": "self_attn.v_proj", "loss": "0.02271", "damp": "0.00500", "layer_forward_time": "1.199", "w_clone_time": "0.001", "quant_time": "0.290"}, {"layer": 4, "module": "self_attn.q_proj", "loss": "1.33594", "damp": "0.00500", "layer_forward_time": "1.199", "w_clone_time": "0.006", "quant_time": "0.292"}, {"layer": 4, "module": "self_attn.o_proj", "loss": "0.00072", "damp": "0.00500", "layer_forward_time": "0.978", "w_clone_time": "0.006", "quant_time": "0.289"}, {"layer": 4, "module": "mlp.up_proj", "loss": "0.39062", "damp": "0.00500", "layer_forward_time": "1.041", "w_clone_time": "0.017", "quant_time": "0.294"}, {"layer": 4, "module": "mlp.gate_proj", "loss": "0.46289", "damp": "0.00500", "layer_forward_time": "1.041", "w_clone_time": "0.016", "quant_time": "0.328"}, {"layer": 4, "module": "mlp.down_proj", "loss": "0.00275", "damp": "0.00500", "layer_forward_time": "1.793", "w_clone_time": "0.017", "quant_time": "0.830"}, {"layer": 5, "module": "self_attn.k_proj", "loss": "0.45508", "damp": "0.00500", "layer_forward_time": "1.343", "w_clone_time": "0.003", "quant_time": "0.294"}, {"layer": 5, "module": "self_attn.v_proj", "loss": "0.02100", "damp": "0.00500", "layer_forward_time": "1.343", "w_clone_time": "0.009", "quant_time": "0.292"}, {"layer": 5, "module": "self_attn.q_proj", "loss": "0.98828", "damp": "0.00500", "layer_forward_time": "1.343", "w_clone_time": "0.012", "quant_time": "0.297"}, {"layer": 5, "module": "self_attn.o_proj", "loss": "0.00083", "damp": "0.00500", "layer_forward_time": "1.170", "w_clone_time": "0.014", "quant_time": "0.312"}, {"layer": 5, "module": "mlp.up_proj", "loss": "0.48828", "damp": "0.00500", "layer_forward_time": "1.203", "w_clone_time": "0.017", "quant_time": "0.294"}, {"layer": 5, "module": "mlp.gate_proj", "loss": "0.59375", "damp": "0.00500", "layer_forward_time": "1.203", "w_clone_time": "0.020", "quant_time": "0.289"}, {"layer": 5, "module": "mlp.down_proj", "loss": "0.00415", "damp": "0.00500", "layer_forward_time": "1.966", "w_clone_time": "0.017", "quant_time": "0.800"}, {"layer": 6, "module": "self_attn.k_proj", "loss": "0.50391", "damp": "0.00500", "layer_forward_time": "0.968", "w_clone_time": "0.002", "quant_time": "0.290"}, {"layer": 6, "module": "self_attn.v_proj", "loss": "0.02466", "damp": "0.00500", "layer_forward_time": "0.968", "w_clone_time": "0.002", "quant_time": "0.288"}, {"layer": 6, "module": "self_attn.q_proj", "loss": "1.17188", "damp": "0.00500", "layer_forward_time": "0.968", "w_clone_time": "0.006", "quant_time": "0.291"}, {"layer": 6, "module": "self_attn.o_proj", "loss": "0.00123", "damp": "0.00500", "layer_forward_time": "0.792", "w_clone_time": "0.006", "quant_time": "0.293"}, {"layer": 6, "module": "mlp.up_proj", "loss": "0.55469", "damp": "0.00500", "layer_forward_time": "0.936", "w_clone_time": "0.018", "quant_time": "0.295"}, {"layer": 6, "module": "mlp.gate_proj", "loss": "0.71484", "damp": "0.00500", "layer_forward_time": "0.936", "w_clone_time": "0.017", "quant_time": "0.292"}, {"layer": 6, "module": "mlp.down_proj", "loss": "0.00589", "damp": "0.00500", "layer_forward_time": "1.882", "w_clone_time": "0.017", "quant_time": "0.811"}, {"layer": 7, "module": "self_attn.k_proj", "loss": "0.50391", "damp": "0.00500", "layer_forward_time": "1.293", "w_clone_time": "0.006", "quant_time": "0.319"}, {"layer": 7, "module": "self_attn.v_proj", "loss": "0.03735", "damp": "0.00500", "layer_forward_time": "1.293", "w_clone_time": "0.004", "quant_time": "0.291"}, {"layer": 7, "module": "self_attn.q_proj", "loss": "1.42969", "damp": "0.00500", "layer_forward_time": "1.293", "w_clone_time": "0.011", "quant_time": "0.297"}, {"layer": 7, "module": "self_attn.o_proj", "loss": "0.00241", "damp": "0.00500", "layer_forward_time": "1.236", "w_clone_time": "0.011", "quant_time": "0.316"}, {"layer": 7, "module": "mlp.up_proj", "loss": "0.64453", "damp": "0.00500", "layer_forward_time": "1.252", "w_clone_time": "0.057", "quant_time": "0.345"}, {"layer": 7, "module": "mlp.gate_proj", "loss": "0.98438", "damp": "0.00500", "layer_forward_time": "1.252", "w_clone_time": "0.035", "quant_time": "0.339"}, {"layer": 7, "module": "mlp.down_proj", "loss": "0.04834", "damp": "0.00500", "layer_forward_time": "1.976", "w_clone_time": "0.020", "quant_time": "1.075"}, {"layer": 8, "module": "self_attn.k_proj", "loss": "1.05469", "damp": "0.00500", "layer_forward_time": "1.001", "w_clone_time": "0.003", "quant_time": "0.296"}, {"layer": 8, "module": "self_attn.v_proj", "loss": "0.05347", "damp": "0.00500", "layer_forward_time": "1.001", "w_clone_time": "0.001", "quant_time": "0.293"}, {"layer": 8, "module": "self_attn.q_proj", "loss": "2.45312", "damp": "0.00500", "layer_forward_time": "1.001", "w_clone_time": "0.008", "quant_time": "0.300"}, {"layer": 8, "module": "self_attn.o_proj", "loss": "0.00193", "damp": "0.00500", "layer_forward_time": "0.822", "w_clone_time": "0.009", "quant_time": "0.294"}, {"layer": 8, "module": "mlp.up_proj", "loss": "0.78125", "damp": "0.00500", "layer_forward_time": "1.106", "w_clone_time": "0.019", "quant_time": "0.341"}, {"layer": 8, "module": "mlp.gate_proj", "loss": "1.07031", "damp": "0.00500", "layer_forward_time": "1.106", "w_clone_time": "0.016", "quant_time": "0.295"}, {"layer": 8, "module": "mlp.down_proj", "loss": "0.01111", "damp": "0.00500", "layer_forward_time": "1.730", "w_clone_time": "0.017", "quant_time": "0.827"}, {"layer": 9, "module": "self_attn.k_proj", "loss": "0.56250", "damp": "0.00500", "layer_forward_time": "1.176", "w_clone_time": "0.003", "quant_time": "0.292"}, {"layer": 9, "module": "self_attn.v_proj", "loss": "0.03394", "damp": "0.00500", "layer_forward_time": "1.176", "w_clone_time": "0.003", "quant_time": "0.292"}, {"layer": 9, "module": "self_attn.q_proj", "loss": "1.37500", "damp": "0.00500", "layer_forward_time": "1.176", "w_clone_time": "0.010", "quant_time": "0.298"}, {"layer": 9, "module": "self_attn.o_proj", "loss": "0.00430", "damp": "0.00500", "layer_forward_time": "1.077", "w_clone_time": "0.008", "quant_time": "0.296"}, {"layer": 9, "module": "mlp.up_proj", "loss": "0.85156", "damp": "0.00500", "layer_forward_time": "1.106", "w_clone_time": "0.017", "quant_time": "0.298"}, {"layer": 9, "module": "mlp.gate_proj", "loss": "1.24219", "damp": "0.00500", "layer_forward_time": "1.106", "w_clone_time": "0.017", "quant_time": "0.296"}, {"layer": 9, "module": "mlp.down_proj", "loss": "0.01477", "damp": "0.00500", "layer_forward_time": "1.740", "w_clone_time": "0.043", "quant_time": "0.863"}, {"layer": 10, "module": "self_attn.k_proj", "loss": "0.64453", "damp": "0.00500", "layer_forward_time": "0.845", "w_clone_time": "0.005", "quant_time": "0.292"}, {"layer": 10, "module": "self_attn.v_proj", "loss": "0.03760", "damp": "0.00500", "layer_forward_time": "0.845", "w_clone_time": "0.007", "quant_time": "0.297"}, {"layer": 10, "module": "self_attn.q_proj", "loss": "1.51562", "damp": "0.00500", "layer_forward_time": "0.845", "w_clone_time": "0.010", "quant_time": "0.290"}, {"layer": 10, "module": "self_attn.o_proj", "loss": "0.00494", "damp": "0.00500", "layer_forward_time": "0.597", "w_clone_time": "0.015", "quant_time": "0.365"}, {"layer": 10, "module": "mlp.up_proj", "loss": "0.93359", "damp": "0.00500", "layer_forward_time": "0.914", "w_clone_time": "0.020", "quant_time": "0.299"}, {"layer": 10, "module": "mlp.gate_proj", "loss": "1.28906", "damp": "0.00500", "layer_forward_time": "0.914", "w_clone_time": "0.016", "quant_time": "0.296"}, {"layer": 10, "module": "mlp.down_proj", "loss": "0.01672", "damp": "0.00500", "layer_forward_time": "1.816", "w_clone_time": "0.017", "quant_time": "0.830"}, {"layer": 11, "module": "self_attn.k_proj", "loss": "0.77344", "damp": "0.00500", "layer_forward_time": "1.321", "w_clone_time": "0.001", "quant_time": "0.352"}, {"layer": 11, "module": "self_attn.v_proj", "loss": "0.04980", "damp": "0.00500", "layer_forward_time": "1.321", "w_clone_time": "0.001", "quant_time": "0.293"}, {"layer": 11, "module": "self_attn.q_proj", "loss": "2.12500", "damp": "0.00500", "layer_forward_time": "1.321", "w_clone_time": "0.006", "quant_time": "0.294"}, {"layer": 11, "module": "self_attn.o_proj", "loss": "0.00742", "damp": "0.00500", "layer_forward_time": "1.191", "w_clone_time": "0.007", "quant_time": "0.297"}, {"layer": 11, "module": "mlp.up_proj", "loss": "1.05469", "damp": "0.00500", "layer_forward_time": "1.246", "w_clone_time": "0.017", "quant_time": "0.292"}, {"layer": 11, "module": "mlp.gate_proj", "loss": "1.48438", "damp": "0.00500", "layer_forward_time": "1.246", "w_clone_time": "0.016", "quant_time": "0.309"}, {"layer": 11, "module": "mlp.down_proj", "loss": "0.02063", "damp": "0.00500", "layer_forward_time": "1.931", "w_clone_time": "0.027", "quant_time": "0.867"}, {"layer": 12, "module": "self_attn.k_proj", "loss": "0.67188", "damp": "0.00500", "layer_forward_time": "1.168", "w_clone_time": "0.004", "quant_time": "0.293"}, {"layer": 12, "module": "self_attn.v_proj", "loss": "0.05835", "damp": "0.00500", "layer_forward_time": "1.168", "w_clone_time": "0.005", "quant_time": "0.286"}, {"layer": 12, "module": "self_attn.q_proj", "loss": "1.60156", "damp": "0.00500", "layer_forward_time": "1.168", "w_clone_time": "0.008", "quant_time": "0.288"}, {"layer": 12, "module": "self_attn.o_proj", "loss": "0.00964", "damp": "0.00500", "layer_forward_time": "0.972", "w_clone_time": "0.006", "quant_time": "0.291"}, {"layer": 12, "module": "mlp.up_proj", "loss": "1.17969", "damp": "0.00500", "layer_forward_time": "1.045", "w_clone_time": "0.018", "quant_time": "0.307"}, {"layer": 12, "module": "mlp.gate_proj", "loss": "1.81250", "damp": "0.00500", "layer_forward_time": "1.045", "w_clone_time": "0.018", "quant_time": "0.291"}, {"layer": 12, "module": "mlp.down_proj", "loss": "0.02832", "damp": "0.00500", "layer_forward_time": "1.779", "w_clone_time": "0.020", "quant_time": "0.805"}, {"layer": 13, "module": "self_attn.k_proj", "loss": "0.77344", "damp": "0.00500", "layer_forward_time": "1.129", "w_clone_time": "0.001", "quant_time": "0.286"}, {"layer": 13, "module": "self_attn.v_proj", "loss": "0.04980", "damp": "0.00500", "layer_forward_time": "1.129", "w_clone_time": "0.005", "quant_time": "0.297"}, {"layer": 13, "module": "self_attn.q_proj", "loss": "1.78906", "damp": "0.00500", "layer_forward_time": "1.129", "w_clone_time": "0.006", "quant_time": "0.292"}, {"layer": 13, "module": "self_attn.o_proj", "loss": "0.01227", "damp": "0.00500", "layer_forward_time": "0.919", "w_clone_time": "0.006", "quant_time": "0.290"}, {"layer": 13, "module": "mlp.up_proj", "loss": "1.32031", "damp": "0.00500", "layer_forward_time": "1.111", "w_clone_time": "0.019", "quant_time": "0.342"}, {"layer": 13, "module": "mlp.gate_proj", "loss": "1.97656", "damp": "0.00500", "layer_forward_time": "1.111", "w_clone_time": "0.017", "quant_time": "0.300"}, {"layer": 13, "module": "mlp.down_proj", "loss": "0.03735", "damp": "0.00500", "layer_forward_time": "1.759", "w_clone_time": "0.017", "quant_time": "0.818"}, {"layer": 14, "module": "self_attn.k_proj", "loss": "0.75000", "damp": "0.00500", "layer_forward_time": "1.041", "w_clone_time": "0.001", "quant_time": "0.290"}, {"layer": 14, "module": "self_attn.v_proj", "loss": "0.05640", "damp": "0.00500", "layer_forward_time": "1.041", "w_clone_time": "0.001", "quant_time": "0.294"}, {"layer": 14, "module": "self_attn.q_proj", "loss": "1.68750", "damp": "0.00500", "layer_forward_time": "1.041", "w_clone_time": "0.006", "quant_time": "0.291"}, {"layer": 14, "module": "self_attn.o_proj", "loss": "0.01917", "damp": "0.00500", "layer_forward_time": "0.863", "w_clone_time": "0.006", "quant_time": "0.295"}, {"layer": 14, "module": "mlp.up_proj", "loss": "1.48438", "damp": "0.00500", "layer_forward_time": "0.893", "w_clone_time": "0.017", "quant_time": "0.300"}, {"layer": 14, "module": "mlp.gate_proj", "loss": "2.12500", "damp": "0.00500", "layer_forward_time": "0.893", "w_clone_time": "0.018", "quant_time": "0.294"}, {"layer": 14, "module": "mlp.down_proj", "loss": "0.04810", "damp": "0.00500", "layer_forward_time": "1.451", "w_clone_time": "0.017", "quant_time": "0.819"}, {"layer": 15, "module": "self_attn.k_proj", "loss": "0.82422", "damp": "0.00500", "layer_forward_time": "1.060", "w_clone_time": "0.003", "quant_time": "0.293"}, {"layer": 15, "module": "self_attn.v_proj", "loss": "0.09668", "damp": "0.00500", "layer_forward_time": "1.060", "w_clone_time": "0.005", "quant_time": "0.300"}, {"layer": 15, "module": "self_attn.q_proj", "loss": "2.46875", "damp": "0.00500", "layer_forward_time": "1.060", "w_clone_time": "0.009", "quant_time": "0.302"}, {"layer": 15, "module": "self_attn.o_proj", "loss": "0.01709", "damp": "0.00500", "layer_forward_time": "0.923", "w_clone_time": "0.009", "quant_time": "0.289"}, {"layer": 15, "module": "mlp.up_proj", "loss": "1.70312", "damp": "0.00500", "layer_forward_time": "1.146", "w_clone_time": "0.017", "quant_time": "0.292"}, {"layer": 15, "module": "mlp.gate_proj", "loss": "2.35938", "damp": "0.00500", "layer_forward_time": "1.146", "w_clone_time": "0.016", "quant_time": "0.292"}, {"layer": 15, "module": "mlp.down_proj", "loss": "0.07666", "damp": "0.00500", "layer_forward_time": "2.029", "w_clone_time": "0.017", "quant_time": "0.814"}, {"layer": 16, "module": "self_attn.k_proj", "loss": "0.83594", "damp": "0.00500", "layer_forward_time": "0.995", "w_clone_time": "0.005", "quant_time": "0.296"}, {"layer": 16, "module": "self_attn.v_proj", "loss": "0.08936", "damp": "0.00500", "layer_forward_time": "0.995", "w_clone_time": "0.006", "quant_time": "0.287"}, {"layer": 16, "module": "self_attn.q_proj", "loss": "2.48438", "damp": "0.00500", "layer_forward_time": "0.995", "w_clone_time": "0.006", "quant_time": "0.288"}, {"layer": 16, "module": "self_attn.o_proj", "loss": "0.02112", "damp": "0.00500", "layer_forward_time": "0.811", "w_clone_time": "0.006", "quant_time": "0.288"}, {"layer": 16, "module": "mlp.up_proj", "loss": "2.04688", "damp": "0.00500", "layer_forward_time": "0.925", "w_clone_time": "0.017", "quant_time": "0.289"}, {"layer": 16, "module": "mlp.gate_proj", "loss": "3.00000", "damp": "0.00500", "layer_forward_time": "0.925", "w_clone_time": "0.017", "quant_time": "0.289"}, {"layer": 16, "module": "mlp.down_proj", "loss": "0.12354", "damp": "0.00500", "layer_forward_time": "1.947", "w_clone_time": "0.017", "quant_time": "0.818"}, {"layer": 17, "module": "self_attn.k_proj", "loss": "0.86328", "damp": "0.00500", "layer_forward_time": "1.347", "w_clone_time": "0.007", "quant_time": "0.288"}, {"layer": 17, "module": "self_attn.v_proj", "loss": "0.16895", "damp": "0.00500", "layer_forward_time": "1.347", "w_clone_time": "0.003", "quant_time": "0.289"}, {"layer": 17, "module": "self_attn.q_proj", "loss": "2.29688", "damp": "0.00500", "layer_forward_time": "1.347", "w_clone_time": "0.008", "quant_time": "0.289"}, {"layer": 17, "module": "self_attn.o_proj", "loss": "0.02808", "damp": "0.00500", "layer_forward_time": "1.066", "w_clone_time": "0.007", "quant_time": "0.288"}, {"layer": 17, "module": "mlp.up_proj", "loss": "2.43750", "damp": "0.00500", "layer_forward_time": "1.152", "w_clone_time": "0.017", "quant_time": "0.293"}, {"layer": 17, "module": "mlp.gate_proj", "loss": "3.45312", "damp": "0.00500", "layer_forward_time": "1.152", "w_clone_time": "0.018", "quant_time": "0.293"}, {"layer": 17, "module": "mlp.down_proj", "loss": "0.14453", "damp": "0.00500", "layer_forward_time": "1.864", "w_clone_time": "0.018", "quant_time": "0.821"}, {"layer": 18, "module": "self_attn.k_proj", "loss": "0.98828", "damp": "0.00500", "layer_forward_time": "1.161", "w_clone_time": "0.007", "quant_time": "0.285"}, {"layer": 18, "module": "self_attn.v_proj", "loss": "0.26758", "damp": "0.00500", "layer_forward_time": "1.161", "w_clone_time": "0.005", "quant_time": "0.290"}, {"layer": 18, "module": "self_attn.q_proj", "loss": "2.57812", "damp": "0.00500", "layer_forward_time": "1.161", "w_clone_time": "0.006", "quant_time": "0.296"}, {"layer": 18, "module": "self_attn.o_proj", "loss": "0.03882", "damp": "0.00500", "layer_forward_time": "0.992", "w_clone_time": "0.006", "quant_time": "0.287"}, {"layer": 18, "module": "mlp.up_proj", "loss": "3.00000", "damp": "0.00500", "layer_forward_time": "1.019", "w_clone_time": "0.023", "quant_time": "0.298"}, {"layer": 18, "module": "mlp.gate_proj", "loss": "4.15625", "damp": "0.00500", "layer_forward_time": "1.019", "w_clone_time": "0.019", "quant_time": "0.291"}, {"layer": 18, "module": "mlp.down_proj", "loss": "0.22949", "damp": "0.00500", "layer_forward_time": "1.973", "w_clone_time": "0.020", "quant_time": "0.825"}, {"layer": 19, "module": "self_attn.k_proj", "loss": "0.90234", "damp": "0.00500", "layer_forward_time": "1.290", "w_clone_time": "0.002", "quant_time": "0.327"}, {"layer": 19, "module": "self_attn.v_proj", "loss": "0.29883", "damp": "0.00500", "layer_forward_time": "1.290", "w_clone_time": "0.002", "quant_time": "0.290"}, {"layer": 19, "module": "self_attn.q_proj", "loss": "2.42188", "damp": "0.00500", "layer_forward_time": "1.290", "w_clone_time": "0.006", "quant_time": "0.291"}, {"layer": 19, "module": "self_attn.o_proj", "loss": "0.05127", "damp": "0.00500", "layer_forward_time": "1.205", "w_clone_time": "0.010", "quant_time": "0.286"}, {"layer": 19, "module": "mlp.up_proj", "loss": "3.67188", "damp": "0.00500", "layer_forward_time": "1.131", "w_clone_time": "0.020", "quant_time": "0.286"}, {"layer": 19, "module": "mlp.gate_proj", "loss": "4.90625", "damp": "0.00500", "layer_forward_time": "1.131", "w_clone_time": "0.017", "quant_time": "0.297"}, {"layer": 19, "module": "mlp.down_proj", "loss": "0.37305", "damp": "0.00500", "layer_forward_time": "1.896", "w_clone_time": "0.018", "quant_time": "0.811"}, {"layer": 20, "module": "self_attn.k_proj", "loss": "0.90625", "damp": "0.00500", "layer_forward_time": "1.261", "w_clone_time": "0.002", "quant_time": "0.346"}, {"layer": 20, "module": "self_attn.v_proj", "loss": "0.23438", "damp": "0.00500", "layer_forward_time": "1.261", "w_clone_time": "0.001", "quant_time": "0.292"}, {"layer": 20, "module": "self_attn.q_proj", "loss": "2.48438", "damp": "0.00500", "layer_forward_time": "1.261", "w_clone_time": "0.009", "quant_time": "0.290"}, {"layer": 20, "module": "self_attn.o_proj", "loss": "0.06836", "damp": "0.00500", "layer_forward_time": "0.969", "w_clone_time": "0.009", "quant_time": "0.295"}, {"layer": 20, "module": "mlp.up_proj", "loss": "4.34375", "damp": "0.00500", "layer_forward_time": "1.141", "w_clone_time": "0.020", "quant_time": "0.315"}, {"layer": 20, "module": "mlp.gate_proj", "loss": "5.62500", "damp": "0.00500", "layer_forward_time": "1.141", "w_clone_time": "0.019", "quant_time": "0.286"}, {"layer": 20, "module": "mlp.down_proj", "loss": "0.59375", "damp": "0.00500", "layer_forward_time": "1.670", "w_clone_time": "0.017", "quant_time": "0.804"}, {"layer": 21, "module": "self_attn.k_proj", "loss": "0.93359", "damp": "0.00500", "layer_forward_time": "0.901", "w_clone_time": "0.002", "quant_time": "0.285"}, {"layer": 21, "module": "self_attn.v_proj", "loss": "0.31250", "damp": "0.00500", "layer_forward_time": "0.901", "w_clone_time": "0.001", "quant_time": "0.293"}, {"layer": 21, "module": "self_attn.q_proj", "loss": "2.68750", "damp": "0.00500", "layer_forward_time": "0.901", "w_clone_time": "0.008", "quant_time": "0.293"}, {"layer": 21, "module": "self_attn.o_proj", "loss": "0.11377", "damp": "0.00500", "layer_forward_time": "0.770", "w_clone_time": "0.014", "quant_time": "0.303"}, {"layer": 21, "module": "mlp.up_proj", "loss": "4.65625", "damp": "0.00500", "layer_forward_time": "0.870", "w_clone_time": "0.023", "quant_time": "0.289"}, {"layer": 21, "module": "mlp.gate_proj", "loss": "7.34375", "damp": "0.00500", "layer_forward_time": "0.870", "w_clone_time": "0.019", "quant_time": "0.287"}, {"layer": 21, "module": "mlp.down_proj", "loss": "1.06250", "damp": "0.00500", "layer_forward_time": "1.509", "w_clone_time": "0.018", "quant_time": "0.808"}]
|
quantize_config.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bits": 4,
|
3 |
+
"dynamic": null,
|
4 |
+
"group_size": 128,
|
5 |
+
"desc_act": false,
|
6 |
+
"static_groups": false,
|
7 |
+
"sym": true,
|
8 |
+
"lm_head": false,
|
9 |
+
"damp_percent": 0.005,
|
10 |
+
"damp_auto_increment": 0.0015,
|
11 |
+
"true_sequential": true,
|
12 |
+
"model_name_or_path": "/monster/data/model/TinyLlama-1.1B-Chat-v1.0/gptq_4bits_10-24_10-47-05_maxlen2048_ns1024_descFalse_damp0.005",
|
13 |
+
"model_file_base_name": "model",
|
14 |
+
"quant_method": "gptq",
|
15 |
+
"checkpoint_format": "gptq",
|
16 |
+
"meta": {
|
17 |
+
"quantizer": "gptqmodel:1.0.10-dev"
|
18 |
+
}
|
19 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
}
|
30 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
3 |
+
size 499723
|
tokenizer_config.json
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"add_prefix_space": null,
|
5 |
+
"added_tokens_decoder": {
|
6 |
+
"0": {
|
7 |
+
"content": "<unk>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": false,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false,
|
12 |
+
"special": true
|
13 |
+
},
|
14 |
+
"1": {
|
15 |
+
"content": "<s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": false,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false,
|
20 |
+
"special": true
|
21 |
+
},
|
22 |
+
"2": {
|
23 |
+
"content": "</s>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": false,
|
26 |
+
"rstrip": false,
|
27 |
+
"single_word": false,
|
28 |
+
"special": true
|
29 |
+
}
|
30 |
+
},
|
31 |
+
"bos_token": "<s>",
|
32 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
33 |
+
"clean_up_tokenization_spaces": false,
|
34 |
+
"eos_token": "</s>",
|
35 |
+
"legacy": false,
|
36 |
+
"model_max_length": 2048,
|
37 |
+
"pad_token": "</s>",
|
38 |
+
"padding_side": "right",
|
39 |
+
"sp_model_kwargs": {},
|
40 |
+
"tokenizer_class": "LlamaTokenizer",
|
41 |
+
"unk_token": "<unk>",
|
42 |
+
"use_default_system_prompt": false
|
43 |
+
}
|