Intel
/

Qwen2-7B-int4-inc

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

intel/auto-round

Model card Files Files and versions Community

wenhuach commited on Oct 21, 2024

Commit

07a117c

·

1 Parent(s): 327644c

replace with autogptq format

Signed-off-by: wenhuach <wenhuach87@gmail.com>

Files changed (3) hide show

config.json +12 -9
generation_config.json +1 -1
model.safetensors +2 -2

config.json CHANGED Viewed

@@ -18,12 +18,13 @@
   "num_key_value_heads": 4,
   "quantization_config": {
     "amp": true,
-    "autoround_version": "0.2.1.dev",
-    "backend": "autoround:exllamav2",
     "bits": 4,
     "data_type": "int",
-    "dataset": "NeelNanda/pile-10k",
     "enable_minmax_tuning": true,
     "enable_quanted_input": true,
     "gradient_accumulate_steps": 1,
     "group_size": 128,
@@ -31,19 +32,21 @@
     "low_gpu_mem_usage": false,
     "lr": 0.001,
     "minmax_lr": 0.001,
-    "n_samples": 512,
-    "quant_method": "intel/auto-round",
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
-    "sym": false,
-    "train_bs": 8
   },
   "rms_norm_eps": 1e-06,
   "rope_theta": 1000000.0,
-  "sliding_window": 131072,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.40.0",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 152064

   "num_key_value_heads": 4,
   "quantization_config": {
     "amp": true,
+    "autoround_version": "0.3.1.dev",
     "bits": 4,
+    "damp_percent": 0.01,
     "data_type": "int",
+    "desc_act": false,
     "enable_minmax_tuning": true,
+    "enable_norm_bias_tuning": false,
     "enable_quanted_input": true,
     "gradient_accumulate_steps": 1,
     "group_size": 128,
     "low_gpu_mem_usage": false,
     "lr": 0.001,
     "minmax_lr": 0.001,
+    "nsamples": 512,
+    "quant_block_list": null,
+    "quant_method": "gptq",
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
+    "sym": true,
+    "train_bs": 8,
+    "true_sequential": false
   },
   "rms_norm_eps": 1e-06,
   "rope_theta": 1000000.0,
+  "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.44.2",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 152064

generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "bos_token_id": 151643,
   "eos_token_id": 151643,
   "max_new_tokens": 2048,
-  "transformers_version": "4.40.0"
 }

   "bos_token_id": 151643,
   "eos_token_id": 151643,
   "max_new_tokens": 2048,
+  "transformers_version": "4.44.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8859aa3ec2bd85f859fe7cf264fafa2a75f912ae9f6e1fa8a739d490dc64345
-size 5575381304

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e862995baefeb5e7281c07fa1011889b579e73bd2d9804faa30034845c1ac70
+size 5573365008