nm-testing
/

TinyLlama-1.1B-Chat-v1.0-kv_cache_default_tinyllama-e2e

nm-autobot commited on 14 days ago

Commit

38b7693

verified ·

1 Parent(s): 12e8507

Upload folder using huggingface_hub

Files changed (1) hide show

recipe.yaml CHANGED Viewed

@@ -2,10 +2,3 @@ quant_stage:
   quant_modifiers:
     QuantizationModifier:
       kv_cache_scheme: {num_bits: 8, type: float, symmetric: true, strategy: tensor}
-    GPTQModifier:
-      sequential_update: false
-      ignore: [lm_head]
-      config_groups:
-        group_0:
-          weights: {num_bits: 4, type: int, symmetric: true, strategy: channel, actorder: false}
-          targets: [Linear]

   quant_modifiers:
     QuantizationModifier:
       kv_cache_scheme: {num_bits: 8, type: float, symmetric: true, strategy: tensor}