OPEA
/

Safetensors
qwen2_vl
4-bit precision
intel/auto-round
weiweiz1 commited on
Commit
61faab7
1 Parent(s): e67cae7

auto_round format

Browse files

Signed-off-by: Zhang, Weiwei1 <weiwei1.zhang@intel.com>

config.json CHANGED
@@ -20,11 +20,11 @@
20
  "quantization_config": {
21
  "amp": true,
22
  "autoround_version": "0.4.2.dev",
 
23
  "batch_size": 8,
24
  "bits": 4,
25
- "damp_percent": 0.01,
26
  "data_type": "int",
27
- "desc_act": false,
28
  "enable_minmax_tuning": true,
29
  "enable_norm_bias_tuning": false,
30
  "enable_quanted_input": true,
@@ -35,12 +35,11 @@
35
  "lr": 0.001,
36
  "minmax_lr": 0.001,
37
  "nsamples": 512,
38
- "quant_method": "gptq",
39
  "scale_dtype": "torch.float16",
40
  "seqlen": 2048,
41
  "sym": true,
42
- "to_quant_block_names": "model.layers",
43
- "true_sequential": false
44
  },
45
  "rms_norm_eps": 1e-06,
46
  "rope_scaling": {
 
20
  "quantization_config": {
21
  "amp": true,
22
  "autoround_version": "0.4.2.dev",
23
+ "backend": "auto_round:gptq:exllamav2",
24
  "batch_size": 8,
25
  "bits": 4,
 
26
  "data_type": "int",
27
+ "dataset": "NeelNanda/pile-10k",
28
  "enable_minmax_tuning": true,
29
  "enable_norm_bias_tuning": false,
30
  "enable_quanted_input": true,
 
35
  "lr": 0.001,
36
  "minmax_lr": 0.001,
37
  "nsamples": 512,
38
+ "quant_method": "intel/auto-round",
39
  "scale_dtype": "torch.float16",
40
  "seqlen": 2048,
41
  "sym": true,
42
+ "to_quant_block_names": "model.layers"
 
43
  },
44
  "rms_norm_eps": 1e-06,
45
  "rope_scaling": {
quantize_config.json → quantization_config.json RENAMED
@@ -17,9 +17,8 @@
17
  "low_gpu_mem_usage": true,
18
  "to_quant_block_names": "model.layers",
19
  "enable_norm_bias_tuning": false,
 
20
  "autoround_version": "0.4.2.dev",
21
- "quant_method": "gptq",
22
- "desc_act": false,
23
- "true_sequential": false,
24
- "damp_percent": 0.01
25
  }
 
17
  "low_gpu_mem_usage": true,
18
  "to_quant_block_names": "model.layers",
19
  "enable_norm_bias_tuning": false,
20
+ "dataset": "NeelNanda/pile-10k",
21
  "autoround_version": "0.4.2.dev",
22
+ "quant_method": "intel/auto-round",
23
+ "backend": "auto_round:gptq:exllamav2"
 
 
24
  }