### Loading Model and Tokenizer: ```python import os import pandas as pd import torch from datasets import load_dataset, Dataset from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, ) from peft import LoraConfig, PeftModel base_model_name = "NousResearch/Llama-2-7b-chat-hf" finetuned_model = "dasanindya15/llama2-7b_qlora_Cladder_v1" # Load the entire model on the GPU 0 device_map = {"": 0} # Reload model in FP16 and merge it with LoRA weights base_model = AutoModelForCausalLM.from_pretrained( base_model_name, low_cpu_mem_usage=True, return_dict=True, torch_dtype=torch.float16, device_map=device_map, ) model = PeftModel.from_pretrained(base_model, finetuned_model) model = model.merge_and_unload() # Reload tokenizer to save it tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True) tokenizer.add_special_tokens({'pad_token': '[PAD]'}) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" ``` --- license: mit datasets: - dasanindya15/Cladder_v1 pipeline_tag: text-generation ---