File size: 567 Bytes
20e924b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("lucasdozie/aos-qmodel-hermeai")
# Load the quantized model
model = torch.jit.load("lucasdozie/aos-qmodel-hermeai/ggml-model-Q4_K_M.gguf")#"path/to/ggml-model-Q4_K_M.gguf")
# Prepare input text
input_text = "Hello, how are you?"
inputs = tokenizer(input_text, return_tensors="pt")
# Run inference
outputs = model.generate(**inputs)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text) |