RuterNorway
commited on
Commit
·
da85060
1
Parent(s):
d7df571
Fixed AutoGPTQ example code
Browse files
README.md
CHANGED
@@ -189,30 +189,25 @@ Then try the following example code:
|
|
189 |
```python
|
190 |
from transformers import AutoTokenizer, pipeline, logging
|
191 |
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
|
|
192 |
model_name_or_path = "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ"
|
193 |
model_basename = "gptq_model-4bit-128g"
|
194 |
use_triton = False
|
195 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
|
|
196 |
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
|
197 |
-
model_basename=model_basename
|
198 |
-
use_safetensors=True,
|
199 |
-
trust_remote_code=True,
|
200 |
-
device="cuda:0",
|
201 |
-
use_triton=use_triton,
|
202 |
-
quantize_config=None)
|
203 |
-
"""
|
204 |
-
To download from a specific branch, use the revision parameter, as in this example:
|
205 |
-
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
|
206 |
-
revision="gptq-4bit-32g-actorder_True",
|
207 |
model_basename=model_basename,
|
208 |
use_safetensors=True,
|
209 |
trust_remote_code=True,
|
210 |
device="cuda:0",
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
|
|
|
|
|
|
216 |
'''
|
217 |
print("\n\n*** Generate:")
|
218 |
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
|
@@ -221,10 +216,11 @@ print(tokenizer.decode(output[0]))
|
|
221 |
# Inference can also be done using transformers' pipeline
|
222 |
# Prevent printing spurious transformers error when using pipeline with AutoGPTQ
|
223 |
logging.set_verbosity(logging.CRITICAL)
|
224 |
-
print("*** Pipeline
|
225 |
pipe = pipeline(
|
226 |
"text-generation",
|
227 |
model=model,
|
|
|
228 |
tokenizer=tokenizer,
|
229 |
max_new_tokens=512,
|
230 |
temperature=0.7,
|
|
|
189 |
```python
|
190 |
from transformers import AutoTokenizer, pipeline, logging
|
191 |
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
192 |
+
# model_name_or_path = "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ"
|
193 |
model_name_or_path = "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ"
|
194 |
model_basename = "gptq_model-4bit-128g"
|
195 |
use_triton = False
|
196 |
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
197 |
+
quantize_config = None
|
198 |
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
model_basename=model_basename,
|
200 |
use_safetensors=True,
|
201 |
trust_remote_code=True,
|
202 |
device="cuda:0",
|
203 |
+
use_triton=use_triton,
|
204 |
+
quantize_config=quantize_config)
|
205 |
+
|
206 |
+
instruction = "Gi en vurdering (positiv/negativ) og 4 stikkord som forklarer vurderingen. Svar i dette formatet: vurdering: positiv/negativ \n,stikkord: \n"
|
207 |
+
input = "Bussjåføren på Snarøya 31 (12.26 bussen på Årvoll senter) som var på vei ut av holdeplassen men venta da han så jeg løp til bussen og ikke var langt unna. You made my day!"
|
208 |
+
prompt_template=f'''### Instruction: {instruction}
|
209 |
+
### Input: {input}
|
210 |
+
### Response:
|
211 |
'''
|
212 |
print("\n\n*** Generate:")
|
213 |
input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
|
|
|
216 |
# Inference can also be done using transformers' pipeline
|
217 |
# Prevent printing spurious transformers error when using pipeline with AutoGPTQ
|
218 |
logging.set_verbosity(logging.CRITICAL)
|
219 |
+
print("\n\n*** Pipeline:\n\n")
|
220 |
pipe = pipeline(
|
221 |
"text-generation",
|
222 |
model=model,
|
223 |
+
do_sample=True,
|
224 |
tokenizer=tokenizer,
|
225 |
max_new_tokens=512,
|
226 |
temperature=0.7,
|