Update README.md
Browse files
README.md
CHANGED
@@ -106,6 +106,31 @@ messages = [
|
|
106 |
tokens = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
|
107 |
```
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
<div align="center">
|
110 |
<h2> Limitations </h2>
|
111 |
</div>
|
|
|
106 |
tokens = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
|
107 |
```
|
108 |
|
109 |
+
## Inference using Transformers
|
110 |
+
|
111 |
+
```python
|
112 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
113 |
+
import torch
|
114 |
+
|
115 |
+
model_id = "openchat/openchat-3.6-8b-20240522"
|
116 |
+
|
117 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
118 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
|
119 |
+
|
120 |
+
messages = [
|
121 |
+
{"role": "user", "content": "Explain how large language models work in detail."},
|
122 |
+
]
|
123 |
+
input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
|
124 |
+
|
125 |
+
outputs = model.generate(input_ids,
|
126 |
+
do_sample=True,
|
127 |
+
temperature=0.6,
|
128 |
+
top_p=0.9,
|
129 |
+
)
|
130 |
+
response = outputs[0][input_ids.shape[-1]:]
|
131 |
+
print(tokenizer.decode(response, skip_special_tokens=True))
|
132 |
+
```
|
133 |
+
|
134 |
<div align="center">
|
135 |
<h2> Limitations </h2>
|
136 |
</div>
|