lrl-modelcloud commited on
Commit
5b3a0a8
1 Parent(s): 854b0c5

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -10
README.md CHANGED
@@ -24,7 +24,6 @@ os.environ['VLLM_ATTENTION_BACKEND'] = 'FLASHINFER'
24
 
25
  from transformers import AutoTokenizer
26
  from gptqmodel import BACKEND, GPTQModel
27
- from vllm import SamplingParams
28
 
29
  model_name = "ModelCloud/gemma-2-27b-it-gptq-4bit"
30
 
@@ -37,15 +36,7 @@ model = GPTQModel.from_quantized(
37
  backend=BACKEND.VLLM,
38
  )
39
 
40
- sampling_params = SamplingParams(temperature=0.95, max_tokens=128)
41
-
42
  inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
43
-
44
- outputs = model.generate(
45
- prompts=inputs,
46
- sampling_params=sampling_params,
47
- )
48
-
49
  print(outputs[0].outputs[0].text)
50
-
51
  ```
 
24
 
25
  from transformers import AutoTokenizer
26
  from gptqmodel import BACKEND, GPTQModel
 
27
 
28
  model_name = "ModelCloud/gemma-2-27b-it-gptq-4bit"
29
 
 
36
  backend=BACKEND.VLLM,
37
  )
38
 
 
 
39
  inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
40
+ outputs = model.generate(prompts=inputs,)
 
 
 
 
 
41
  print(outputs[0].outputs[0].text)
 
42
  ```