VMware
/

open-llama-7b-open-instruct

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

rbattle commited on Jun 12, 2023

Commit

0278fdb

•

1 Parent(s): 6a94432

Update README.md

Files changed (1) hide show

README.md +4 -4

README.md CHANGED Viewed

@@ -33,16 +33,16 @@ import os
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-model_name = 'VMware/open-llama-7B-open-instruct'
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype= torch.float16, device_map = 'sequential')
 prompt_template = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:"
-prompt=  'Explain in simple terms how the attention mechanism of a transformer model works'
 inputt = prompt_template.format(instruction= prompt)
@@ -51,7 +51,7 @@ input_ids = tokenizer(inputt, return_tensors="pt").input_ids.to("cuda")
 output1 = model.generate(input_ids, max_length=512)
 input_length = input_ids.shape[1]
 output1 = output1[:, input_length:]
-output= tokenizer.decode(output1[0])
 print(output)

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+model_name = 'VMware/open-llama-7b-open-instruct'
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map='sequential')
 prompt_template = "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:"
+prompt = 'Explain in simple terms how the attention mechanism of a transformer model works'
 inputt = prompt_template.format(instruction= prompt)
 output1 = model.generate(input_ids, max_length=512)
 input_length = input_ids.shape[1]
 output1 = output1[:, input_length:]
+output = tokenizer.decode(output1[0])
 print(output)