|
--- |
|
license: apache-2.0 |
|
--- |
|
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModel |
|
import torch |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(f"Using device: {device}") |
|
|
|
model = AutoModelForCausalLM.from_pretrained("orionweller/test-flex-gpt", trust_remote_code=True) |
|
model = model.to(device) |
|
tokenizer = AutoTokenizer.from_pretrained("orionweller/test-flex-gpt", trust_remote_code=True) |
|
|
|
# test it out and encode some text |
|
prompt = "The capital of France is" |
|
inputs = tokenizer(prompt, return_tensors="pt").input_ids |
|
# put the input ids on the right device |
|
inputs = inputs.to(device) |
|
outputs = model.generate(inputs, max_new_tokens=5, do_sample=True, top_p=0.95) |
|
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)) |