pankajmathur commited on
Commit
18e1081
1 Parent(s): 32807fe

updated sample code

Browse files
Files changed (1) hide show
  1. README.md +26 -13
README.md CHANGED
@@ -39,39 +39,52 @@ Hello Orca Mini, what can you do for me?<|eot_id|>
39
  <|start_header_id|>assistant<|end_header_id|>
40
  ```
41
 
42
- Below shows a code example on how to use this model in default(bf16) format
43
 
44
  ```python
45
- from transformers import AutoModel, AutoTokenizer
 
46
 
47
  model_slug = "pankajmathur/orca_mini_v8_0_70b"
48
- model = AutoModel.from_pretrained(model_slug)
49
- tokenizer = AutoTokenizer.from_pretrained(model_slug)
 
 
 
50
  messages = [
51
  {"role": "system", "content": "You are Orca Mini, a helpful AI assistant."},
52
  {"role": "user", "content": "Hello Orca Mini, what can you do for me?"}
53
  ]
54
- gen_input = tokenizer.apply_chat_template(messages, return_tensors="pt")
55
- model.generate(**gen_input)
56
  ```
57
 
58
  Below shows a code example on how to use this model in 8-bit format via bitsandbytes library
59
 
60
  ```python
61
  import torch
62
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
63
 
64
  model_slug = "pankajmathur/orca_mini_v8_0_70b"
65
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
66
- quantized_model = AutoModelForCausalLM.from_pretrained(
67
- model_slug, device_map="auto", torch_dtype=torch.bfloat16, quantization_config=quantization_config)
68
- tokenizer = AutoTokenizer.from_pretrained(model_slug)
 
 
 
 
 
 
 
 
69
  messages = [
70
  {"role": "system", "content": "You are Orca Mini, a helpful AI assistant."},
71
  {"role": "user", "content": "Hello Orca Mini, what can you do for me?"}
72
  ]
73
- gen_input = tokenizer.apply_chat_template(messages, return_tensors="pt")
74
- quantized_model.generate(**gen_input)
 
75
  ```
76
 
77
  Below shows a code example on how to do a tool use with this model and tranformer library
 
39
  <|start_header_id|>assistant<|end_header_id|>
40
  ```
41
 
42
+ Below shows a code example on how to use this model in default full precision (bf16) format, it requires
43
 
44
  ```python
45
+ import torch
46
+ from transformers import pipeline
47
 
48
  model_slug = "pankajmathur/orca_mini_v8_0_70b"
49
+ pipeline = pipeline(
50
+ "text-generation",
51
+ model=model_slug,
52
+ device_map="auto",
53
+ )
54
  messages = [
55
  {"role": "system", "content": "You are Orca Mini, a helpful AI assistant."},
56
  {"role": "user", "content": "Hello Orca Mini, what can you do for me?"}
57
  ]
58
+ outputs = pipeline(messages, max_new_tokens=128, do_sample=True, temperature=0.01, top_k=100, top_p=0.95)
59
+ print(outputs[0]["generated_text"][-1])
60
  ```
61
 
62
  Below shows a code example on how to use this model in 8-bit format via bitsandbytes library
63
 
64
  ```python
65
  import torch
66
+ from transformers import BitsAndBytesConfig, pipeline
67
 
68
  model_slug = "pankajmathur/orca_mini_v8_0_70b"
69
+ quantization_config = BitsAndBytesConfig(
70
+ load_in_4bit=True,
71
+ bnb_4bit_quant_type="nf4",
72
+ bnb_4bit_compute_dtype="float16",
73
+ bnb_4bit_use_double_quant=True,
74
+ )
75
+ pipeline = pipeline(
76
+ "text-generation",
77
+ model=model_slug,
78
+ model_kwargs={"quantization_config": quantization_config},
79
+ device_map="auto",
80
+ )
81
  messages = [
82
  {"role": "system", "content": "You are Orca Mini, a helpful AI assistant."},
83
  {"role": "user", "content": "Hello Orca Mini, what can you do for me?"}
84
  ]
85
+ outputs = pipeline(messages, max_new_tokens=128, do_sample=True, temperature=0.01, top_k=100, top_p=0.95)
86
+ print(outputs[0]["generated_text"][-1])
87
+
88
  ```
89
 
90
  Below shows a code example on how to do a tool use with this model and tranformer library