Lin-K76 commited on
Commit
b3794bc
1 Parent(s): cc9f607

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -1
README.md CHANGED
@@ -84,11 +84,16 @@ ds = load_dataset("mgoin/ultrachat_2k", split="train_sft").select(range(512))
84
  examples = [tokenizer.apply_chat_template(batch["messages"], tokenize=False) for batch in ds]
85
  examples = tokenizer(examples, padding=True, truncation=True, return_tensors="pt").to("cuda")
86
 
87
- quantize_config = BaseQuantizeConfig(quant_method="fp8", activation_scheme="static")
 
 
 
 
88
 
89
  model = AutoFP8ForCausalLM.from_pretrained(
90
  pretrained_model_dir, quantize_config=quantize_config
91
  )
 
92
  model.quantize(examples)
93
  model.save_quantized(quantized_model_dir)
94
  ```
 
84
  examples = [tokenizer.apply_chat_template(batch["messages"], tokenize=False) for batch in ds]
85
  examples = tokenizer(examples, padding=True, truncation=True, return_tensors="pt").to("cuda")
86
 
87
+ quantize_config = BaseQuantizeConfig(
88
+ quant_method="fp8",
89
+ activation_scheme="dynamic", # or "static"
90
+ ignore_patterns=["re:.*lm_head"],
91
+ )
92
 
93
  model = AutoFP8ForCausalLM.from_pretrained(
94
  pretrained_model_dir, quantize_config=quantize_config
95
  )
96
+
97
  model.quantize(examples)
98
  model.save_quantized(quantized_model_dir)
99
  ```