Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,30 @@
|
|
1 |
-
|
2 |
-
import
|
3 |
|
4 |
-
|
5 |
|
6 |
-
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
import logging
|
3 |
|
4 |
+
from hf_olmo import OLMoForCausalLM, OLMoTokenizerFast
|
5 |
|
6 |
+
# Enable logging
|
7 |
+
logging.basicConfig(
|
8 |
+
format="%(asctime)s - %(name)s - %(lineno)s - %(funcName)s - %(levelname)s - %(message)s",
|
9 |
+
level=logging.INFO
|
10 |
+
)
|
11 |
+
# set higher logging level for httpx to avoid all GET and POST requests being logged
|
12 |
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
13 |
+
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
MODEL = "allenai/OLMo-7B-Instruct"
|
17 |
+
|
18 |
+
olmo = OLMoForCausalLM.from_pretrained(MODEL)
|
19 |
+
tokenizer = OLMoTokenizerFast.from_pretrained(MODEL)
|
20 |
+
chat = [
|
21 |
+
{"role": "user",
|
22 |
+
"content": "What is language modeling?"},
|
23 |
+
]
|
24 |
+
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
25 |
+
inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
|
26 |
+
# optional verifying cuda
|
27 |
+
# inputs = {k: v.to('cuda') for k,v in inputs.items()}
|
28 |
+
# olmo = olmo.to('cuda')
|
29 |
+
response = olmo.generate(input_ids=inputs.to(olmo.device), max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
|
30 |
+
print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])
|