Improve code snippet (#3)
Browse files- Improve code snippet (c4f91327b0b9f51cee01f41f9da8ed195e030378)
- Update README.md (c15b81007348c72c96b834df15b62bc57dc62965)
- Update README.md (d9b36fe104aaadadd78b092dc6f6b6e0263c3df4)
README.md
CHANGED
@@ -102,18 +102,15 @@ prompt = [
|
|
102 |
]
|
103 |
|
104 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
105 |
-
|
106 |
-
inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt").cuda()
|
107 |
-
|
108 |
model = AutoAWQForCausalLM.from_pretrained(
|
109 |
model_id,
|
110 |
torch_dtype=torch.float16,
|
111 |
low_cpu_mem_usage=True,
|
112 |
device_map="auto",
|
113 |
-
fuse_layers=True,
|
114 |
)
|
115 |
|
116 |
-
|
|
|
117 |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
118 |
```
|
119 |
|
|
|
102 |
]
|
103 |
|
104 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
|
|
|
105 |
model = AutoAWQForCausalLM.from_pretrained(
|
106 |
model_id,
|
107 |
torch_dtype=torch.float16,
|
108 |
low_cpu_mem_usage=True,
|
109 |
device_map="auto",
|
|
|
110 |
)
|
111 |
|
112 |
+
inputs = tokenizer.apply_chat_template(prompt, tokenize=True, add_generation_prompt=True, return_tensors="pt", return_dict=True).to('cuda')
|
113 |
+
outputs = model.generate(**inputs, do_sample=True, max_new_tokens=256)
|
114 |
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
115 |
```
|
116 |
|