from peft import PeftModel | |
from transformers import LlamaTokenizer, LlamaForCausalLM | |
def load_model( | |
base="decapoda-research/llama-7b-hf", | |
finetuned="tloen/alpaca-lora-7b", | |
): | |
tokenizer = LlamaTokenizer.from_pretrained(base) | |
tokenizer.pad_token_id = 0 | |
tokenizer.padding_side = "left" | |
model = LlamaForCausalLM.from_pretrained( | |
base, | |
load_in_8bit=True, | |
device_map="auto", | |
) | |
model = PeftModel.from_pretrained(model, finetuned, device_map={'': 0}) | |
return model, tokenizer | |