helenai commited on
Commit
4d562b1
1 Parent(s): 1501889

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -8
README.md CHANGED
@@ -11,15 +11,26 @@ This is the [ibm-granite/granite-8b-code-instruct](https://huggingface.co/ibm-gr
11
 
12
  An example of how to do inference on this model:
13
  ```python
 
14
  from optimum.intel import OVModelForCausalLM
15
- from transformers import AutoTokenizer, pipeline
16
 
17
- # model_id should be set to either a local directory or a model available on the HuggingFace hub.
18
- model_id = "helenai/ibm-granite-granite-8b-code-instruct-ov"
19
- tokenizer = AutoTokenizer.from_pretrained(model_id)
20
- model = OVModelForCausalLM.from_pretrained(model_id)
21
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
22
- result = pipe("hello world")
23
- print(result)
 
 
 
 
 
 
 
 
 
 
 
24
  ```
25
 
 
11
 
12
  An example of how to do inference on this model:
13
  ```python
14
+ from transformers import AutoTokenizer
15
  from optimum.intel import OVModelForCausalLM
 
16
 
17
+ model_path = "helenai/ibm-granite-granite-8b-code-instruct-ov"
18
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
19
+ model = OVModelForCausalLM.from_pretrained(model_path)
20
+
21
+ # change input text as desired
22
+ chat = [
23
+ { "role": "user", "content": "Write a code to find the maximum value in a list of numbers." },
24
+ ]
25
+ chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
26
+ # tokenize the text
27
+ input_tokens = tokenizer(chat, return_tensors="pt")
28
+ # generate output tokens
29
+ output = model.generate(**input_tokens, max_new_tokens=100)
30
+ # decode output tokens into text
31
+ output = tokenizer.batch_decode(output)
32
+ # loop over the batch to print, in this example the batch size is 1
33
+ for i in output:
34
+ print(i)
35
  ```
36