RuterNorway commited on
Commit
da85060
·
1 Parent(s): d7df571

Fixed AutoGPTQ example code

Browse files
Files changed (1) hide show
  1. README.md +12 -16
README.md CHANGED
@@ -189,30 +189,25 @@ Then try the following example code:
189
  ```python
190
  from transformers import AutoTokenizer, pipeline, logging
191
  from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
 
192
  model_name_or_path = "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ"
193
  model_basename = "gptq_model-4bit-128g"
194
  use_triton = False
195
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
 
196
  model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
197
- model_basename=model_basename
198
- use_safetensors=True,
199
- trust_remote_code=True,
200
- device="cuda:0",
201
- use_triton=use_triton,
202
- quantize_config=None)
203
- """
204
- To download from a specific branch, use the revision parameter, as in this example:
205
- model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
206
- revision="gptq-4bit-32g-actorder_True",
207
  model_basename=model_basename,
208
  use_safetensors=True,
209
  trust_remote_code=True,
210
  device="cuda:0",
211
- quantize_config=None)
212
- """
213
- prompt = "Fortell meg om AI"
214
- prompt_template=f'''### Human: {prompt}
215
- ### Assistant:
 
 
 
216
  '''
217
  print("\n\n*** Generate:")
218
  input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
@@ -221,10 +216,11 @@ print(tokenizer.decode(output[0]))
221
  # Inference can also be done using transformers' pipeline
222
  # Prevent printing spurious transformers error when using pipeline with AutoGPTQ
223
  logging.set_verbosity(logging.CRITICAL)
224
- print("*** Pipeline:")
225
  pipe = pipeline(
226
  "text-generation",
227
  model=model,
 
228
  tokenizer=tokenizer,
229
  max_new_tokens=512,
230
  temperature=0.7,
 
189
  ```python
190
  from transformers import AutoTokenizer, pipeline, logging
191
  from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
192
+ # model_name_or_path = "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ"
193
  model_name_or_path = "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ"
194
  model_basename = "gptq_model-4bit-128g"
195
  use_triton = False
196
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
197
+ quantize_config = None
198
  model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
 
 
 
 
 
 
 
 
 
 
199
  model_basename=model_basename,
200
  use_safetensors=True,
201
  trust_remote_code=True,
202
  device="cuda:0",
203
+ use_triton=use_triton,
204
+ quantize_config=quantize_config)
205
+
206
+ instruction = "Gi en vurdering (positiv/negativ) og 4 stikkord som forklarer vurderingen. Svar i dette formatet: vurdering: positiv/negativ \n,stikkord: \n"
207
+ input = "Bussjåføren på Snarøya 31 (12.26 bussen på Årvoll senter) som var på vei ut av holdeplassen men venta da han så jeg løp til bussen og ikke var langt unna. You made my day!"
208
+ prompt_template=f'''### Instruction: {instruction}
209
+ ### Input: {input}
210
+ ### Response:
211
  '''
212
  print("\n\n*** Generate:")
213
  input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
 
216
  # Inference can also be done using transformers' pipeline
217
  # Prevent printing spurious transformers error when using pipeline with AutoGPTQ
218
  logging.set_verbosity(logging.CRITICAL)
219
+ print("\n\n*** Pipeline:\n\n")
220
  pipe = pipeline(
221
  "text-generation",
222
  model=model,
223
+ do_sample=True,
224
  tokenizer=tokenizer,
225
  max_new_tokens=512,
226
  temperature=0.7,