from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
# Load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("lucasdozie/aos-qmodel-hermeai") | |
# Load the quantized model | |
model = torch.jit.load("lucasdozie/aos-qmodel-hermeai/ggml-model-Q4_K_M.gguf")#"path/to/ggml-model-Q4_K_M.gguf") | |
# Prepare input text | |
input_text = "Hello, how are you?" | |
inputs = tokenizer(input_text, return_tensors="pt") | |
# Run inference | |
outputs = model.generate(**inputs) | |
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
print(generated_text) |