aos-qmodel-hermeai / transformers_inference.py
lucasdozie's picture
Upload transformers_inference.py with huggingface_hub
20e924b verified
raw
history blame
567 Bytes
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("lucasdozie/aos-qmodel-hermeai")
# Load the quantized model
model = torch.jit.load("lucasdozie/aos-qmodel-hermeai/ggml-model-Q4_K_M.gguf")#"path/to/ggml-model-Q4_K_M.gguf")
# Prepare input text
input_text = "Hello, how are you?"
inputs = tokenizer(input_text, return_tensors="pt")
# Run inference
outputs = model.generate(**inputs)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)