Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
datasets:
|
3 |
+
- Open-Orca/OpenOrca
|
4 |
+
language:
|
5 |
+
- en
|
6 |
+
library_name: transformers
|
7 |
+
pipeline_tag: text-generation
|
8 |
+
---
|
9 |
+
|
10 |
+
# Overview
|
11 |
+
|
12 |
+
Unreleased, untested, unfinished beta.
|
13 |
+
|
14 |
+
|
15 |
+
# Inference
|
16 |
+
|
17 |
+
Remove *`.to('cuda')`* for unaccelerated.
|
18 |
+
|
19 |
+
```python
|
20 |
+
import torch
|
21 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
|
22 |
+
|
23 |
+
model = AutoModelForCausalLM.from_pretrained("Open-Orca/oo-phi-1_5",
|
24 |
+
trust_remote_code=True,
|
25 |
+
torch_dtype=torch.bfloat16
|
26 |
+
).to('cuda')
|
27 |
+
tokenizer = AutoTokenizer.from_pretrained("Open-Orca/oo-phi-1_5",
|
28 |
+
trust_remote_code=True,
|
29 |
+
torch_dtype=torch.bfloat16)
|
30 |
+
|
31 |
+
sys_prompt = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning. " \
|
32 |
+
"I am an assistant who thinks through their answers step-by-step to be sure I always get the right answer. " \
|
33 |
+
"I think more clearly if I write out my thought process in a scratchpad manner first; therefore, I always " \
|
34 |
+
"explain background context, assumptions, and step-by-step thinking BEFORE trying to answer a question."
|
35 |
+
prompt = "Tell me about yourself please."
|
36 |
+
|
37 |
+
prefix = "<|im_start|>"
|
38 |
+
suffix = "<|im_end|>\n"
|
39 |
+
sys_format = prefix + "system\n" + sys_prompt + suffix
|
40 |
+
user_format = prefix + "user\n" + prompt + suffix
|
41 |
+
assistant_format = prefix + "assistant\n"
|
42 |
+
input_text = sys_format + user_format + assistant_format
|
43 |
+
|
44 |
+
generation_config = GenerationConfig(
|
45 |
+
max_length=512, temperature=0.01, top_p=0.95, repetition_penalty=1.1,
|
46 |
+
do_sample=True, use_cache=True,
|
47 |
+
eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id,
|
48 |
+
transformers_version="4.33.1")
|
49 |
+
|
50 |
+
inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=False).to('cuda')
|
51 |
+
outputs = model.generate(**inputs, generation_config=generation_config)
|
52 |
+
|
53 |
+
text = tokenizer.batch_decode(outputs)[0]
|
54 |
+
print(text)
|
55 |
+
```
|