# pip install git+https://github.com/huggingface/transformers.git@main accelerate from transformers import LlamaTokenizer, AutoModelForCausalLM tokenizer = LlamaTokenizer.from_pretrained("./") model = AutoModelForCausalLM.from_pretrained("./") inputs = tokenizer("A cat sat", return_tensors="pt")["input_ids"] outputs = model.generate(inputs, max_new_tokens=5) print(tokenizer.decode(outputs[0]))