winglian commited on
Commit
faecff9
1 Parent(s): aa656e0

support to disable exllama for gptq (#604)

Browse files

* support to disable exllama for gptq

* update property instead of item

* fix config key

examples/llama-2/gptq-lora.yml CHANGED
@@ -2,7 +2,7 @@ base_model: TheBloke/Llama-2-7B-GPTQ
2
  base_model_config: TheBloke/Llama-2-7B-GPTQ
3
  is_llama_derived_model: false
4
  gptq: true
5
- gptq_bits: 4
6
  model_type: AutoModelForCausalLM
7
  tokenizer_type: LlamaTokenizer
8
  tokenizer_use_fast: true
@@ -62,8 +62,6 @@ xformers_attention:
62
  flash_attention:
63
  sdp_attention:
64
  flash_optimum:
65
- gptq_groupsize:
66
- gptq_model_v1:
67
  warmup_steps: 100
68
  eval_steps:
69
  save_steps:
 
2
  base_model_config: TheBloke/Llama-2-7B-GPTQ
3
  is_llama_derived_model: false
4
  gptq: true
5
+ gptq_disable_exllama: true
6
  model_type: AutoModelForCausalLM
7
  tokenizer_type: LlamaTokenizer
8
  tokenizer_use_fast: true
 
62
  flash_attention:
63
  sdp_attention:
64
  flash_optimum:
 
 
65
  warmup_steps: 100
66
  eval_steps:
67
  save_steps:
src/axolotl/utils/models.py CHANGED
@@ -196,6 +196,10 @@ def load_model(
196
  if not hasattr(model_config, "quantization_config"):
197
  LOG.warning("model config does not contain quantization_config information")
198
  else:
 
 
 
 
199
  model_kwargs["quantization_config"] = GPTQConfig(
200
  **model_config.quantization_config
201
  )
 
196
  if not hasattr(model_config, "quantization_config"):
197
  LOG.warning("model config does not contain quantization_config information")
198
  else:
199
+ if cfg.gptq_disable_exllama is not None:
200
+ model_config.quantization_config[
201
+ "disable_exllama"
202
+ ] = cfg.gptq_disable_exllama
203
  model_kwargs["quantization_config"] = GPTQConfig(
204
  **model_config.quantization_config
205
  )