thwin27 commited on
Commit
7652df9
1 Parent(s): ee960b9

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -1
README.md CHANGED
@@ -2,4 +2,30 @@
2
  license: apache-2.0
3
  base_model:
4
  - rhymes-ai/Aria-sequential_mlp
5
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  license: apache-2.0
3
  base_model:
4
  - rhymes-ai/Aria-sequential_mlp
5
+ ---
6
+ # Aria-sequential_mlp-FP8-dynamic
7
+
8
+ FP8-Dynamic quant from rhymes-ai Aria made with llm-compressor.
9
+
10
+ Generated with the following code:
11
+
12
+ ```
13
+ from transformers import AutoProcessor, AutoModelForCausalLM
14
+ from llmcompressor.modifiers.quantization import QuantizationModifier
15
+ from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot
16
+
17
+ model_name = "rhymes-ai/Aria-sequential_mlp"
18
+
19
+ model = SparseAutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto", trust_remote_code=True)
20
+ processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
21
+
22
+ recipe = QuantizationModifier(
23
+ targets="Linear",
24
+ scheme="FP8_DYNAMIC",
25
+ ignore=["re:.*lm_head", "re:multi_modal_projector.*", "re:vision_tower.*"],
26
+ )
27
+
28
+ folder = model_name.split("/")[1] + "-FP8-Dynamic"
29
+ oneshot(model=model, recipe=recipe, output_dir=folder)
30
+ processor.save_pretrained(folder)
31
+ ```