Millakisan commited on
Commit
9c911bc
·
verified ·
1 Parent(s): c8e45e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -5
app.py CHANGED
@@ -1,7 +1,30 @@
1
- from transformers import pipeline
2
- import gradio as gr
3
 
4
- pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es")
5
 
6
- demo = gr.Interface.from_pipeline(pipe)
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ import logging
3
 
4
+ from hf_olmo import OLMoForCausalLM, OLMoTokenizerFast
5
 
6
+ # Enable logging
7
+ logging.basicConfig(
8
+ format="%(asctime)s - %(name)s - %(lineno)s - %(funcName)s - %(levelname)s - %(message)s",
9
+ level=logging.INFO
10
+ )
11
+ # set higher logging level for httpx to avoid all GET and POST requests being logged
12
+ logging.getLogger("httpx").setLevel(logging.WARNING)
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ MODEL = "allenai/OLMo-7B-Instruct"
17
+
18
+ olmo = OLMoForCausalLM.from_pretrained(MODEL)
19
+ tokenizer = OLMoTokenizerFast.from_pretrained(MODEL)
20
+ chat = [
21
+ {"role": "user",
22
+ "content": "What is language modeling?"},
23
+ ]
24
+ prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
25
+ inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
26
+ # optional verifying cuda
27
+ # inputs = {k: v.to('cuda') for k,v in inputs.items()}
28
+ # olmo = olmo.to('cuda')
29
+ response = olmo.generate(input_ids=inputs.to(olmo.device), max_new_tokens=100, do_sample=True, top_k=50, top_p=0.95)
30
+ print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])