helenai commited on
Commit
936656b
1 Parent(s): 4d562b1

Update inference.py

Browse files
Files changed (1) hide show
  1. inference.py +19 -8
inference.py CHANGED
@@ -1,10 +1,21 @@
 
1
  from optimum.intel import OVModelForCausalLM
2
- from transformers import AutoTokenizer, pipeline
3
 
4
- # model_id should be set to either a local directory or a model available on the HuggingFace hub.
5
- model_id = "helenai/ibm-granite-granite-8b-code-instruct-ov"
6
- tokenizer = AutoTokenizer.from_pretrained(model_id)
7
- model = OVModelForCausalLM.from_pretrained(model_id)
8
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
9
- result = pipe("hello world")
10
- print(result)
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer
2
  from optimum.intel import OVModelForCausalLM
 
3
 
4
+ model_path = "helenai/ibm-granite-granite-8b-code-instruct-ov"
5
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
6
+ model = OVModelForCausalLM.from_pretrained(model_path)
7
+
8
+ # change input text as desired
9
+ chat = [
10
+ { "role": "user", "content": "Write a code to find the maximum value in a list of numbers." },
11
+ ]
12
+ chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
13
+ # tokenize the text
14
+ input_tokens = tokenizer(chat, return_tensors="pt")
15
+ # generate output tokens
16
+ output = model.generate(**input_tokens, max_new_tokens=100)
17
+ # decode output tokens into text
18
+ output = tokenizer.batch_decode(output)
19
+ # loop over the batch to print, in this example the batch size is 1
20
+ for i in output:
21
+ print(i)