File size: 777 Bytes
c46937d 204da06 f051168 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
---
license: apache-2.0
---
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model = AutoModelForCausalLM.from_pretrained("orionweller/test-flex-gpt", trust_remote_code=True)
model = model.to(device)
tokenizer = AutoTokenizer.from_pretrained("orionweller/test-flex-gpt", trust_remote_code=True)
# test it out and encode some text
prompt = "The capital of France is"
inputs = tokenizer(prompt, return_tensors="pt").input_ids
# put the input ids on the right device
inputs = inputs.to(device)
outputs = model.generate(inputs, max_new_tokens=5, do_sample=True, top_p=0.95)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)) |