Spaces:
Running
on
T4
Running
on
T4
File size: 6,059 Bytes
470be5c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
from transformers import AutoModelForSeq2SeqLM
import torch
from datasets import Dataset
from torch.utils.data import DataLoader
from transformers import default_data_collator
from transformers import AutoTokenizer
from tqdm import tqdm
import pandas as pd
import numpy
import random
import nevergrad as ng
from peft.utils.save_and_load import set_peft_model_state_dict, get_peft_model_state_dict
from peft import PeftModel, PeftConfig
from functools import partial
random.seed(42)
numpy.random.seed(42)
def load_base_model_and_lora_modules(lora_module_list):
# use gpu if available
device = "cuda" if torch.cuda.is_available() else "cpu"
# load basic model
default_peft_model_id = lora_module_list[0]
# find the base model
model_name_or_path = PeftConfig.from_pretrained(default_peft_model_id).base_model_name_or_path
base_model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)
# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
# 0 is the default model
peft_model = PeftModel.from_pretrained(base_model, default_peft_model_id)
peft_model = peft_model.to(device)
peft_model.eval()
print("> Begin to load lora modules")
cache = {}
for peft_model_id in tqdm(lora_module_list):
print("> Loading {} ...".format(peft_model_id))
cur_peft_model = PeftModel.from_pretrained(base_model, peft_model_id)
cache[peft_model_id] = get_peft_model_state_dict(cur_peft_model)
return peft_model, tokenizer, cache
def preprocess_function(examples, tokenizer):
inputs = examples["input"]
targets = examples["output"]
model_inputs = tokenizer(
inputs,
max_length=2048,
padding=True,
truncation=True,
return_tensors="pt",
)
labels = tokenizer(
targets,
max_length=256,
padding=True,
truncation=True,
return_tensors="pt",
)
labels = labels["input_ids"]
labels[labels == tokenizer.pad_token_id] = -100
model_inputs["labels"] = labels
return model_inputs
def load_dataset_and_run(example_inputs, example_outputs, tokenizer):
df = [
{"input": example_inputs[i], "output": example_outputs[i]}
for i in range(len(example_inputs))
]
dataset = Dataset.from_pandas(pd.DataFrame(df))
preprocess_func_with_tokenizer = partial(preprocess_function, tokenizer=tokenizer)
processed_datasets = dataset.map(
preprocess_func_with_tokenizer,
batched=True,
num_proc=1,
desc="Running tokenizer on dataset",
)
return processed_datasets
def get_score(weights, model, cache, example_dataset):
# the composed lora state dict
final_state_dict = {}
# module list is the list
lora_module_list = list(cache.keys())
# all keys are the same
keys = cache[lora_module_list[0]].keys()
for i, peft_model_id in enumerate(lora_module_list):
lora_state_dict = cache[peft_model_id]
if i == 0:
for key in keys:
final_state_dict[key] = weights[i] * lora_state_dict[key]
else:
for key in keys:
final_state_dict[key] = (
final_state_dict[key] + weights[i] * lora_state_dict[key]
)
# reload the model with the new adapter config
set_peft_model_state_dict(model, final_state_dict)
def get_loss():
# use gpu if available
train_dataset = example_dataset
train_dataloader = DataLoader(
train_dataset,
collate_fn=default_data_collator,
batch_size=len(train_dataset),
pin_memory=True,
)
train_loss = 0
with torch.no_grad():
device = "cuda" if torch.cuda.is_available() else "cpu"
for _, batch in enumerate(train_dataloader):
batch = {k: v.to(device) for k, v in batch.items()}
with torch.no_grad():
outputs = model(**batch)
loss = outputs.loss
train_loss += loss.detach().float()
loss = train_loss.float()
return float(loss) / len(train_dataset["input"])
# minimize the metric
loss = get_loss()
# L1 regularization term
sum_of_squares = sum([abs(x) for x in weights]) / len(weights)
metric_val = loss + 0.05 * sum_of_squares
return metric_val
def get_final_weights(weights, lora_module_list, cache):
final_state_dict = {}
keys = cache[lora_module_list[0]].keys()
for i, peft_model_id in enumerate(lora_module_list):
lora_state_dict = cache[peft_model_id]
if i == 0:
for key in keys:
final_state_dict[key] = weights[i] * lora_state_dict[key]
else:
for key in keys:
final_state_dict[key] = (
final_state_dict[key] + weights[i] * lora_state_dict[key]
)
return final_state_dict
def lorahub_learning(lora_module_list, text_input, text_output, max_inference_step):
number_of_loras = len(lora_module_list)
if number_of_loras == 0:
return None
# load model
model, tokenizer, cache = load_base_model_and_lora_modules(lora_module_list)
# process dataset
dataset = load_dataset_and_run(text_input.split("\n"), text_output.split("\n"), tokenizer)
get_score_partial = partial(get_score, model=model, cache=cache,
example_dataset=dataset)
# set up the limit of the weights
instrum = ng.p.Array(
init=[0] * number_of_loras,
upper=[1.5] * number_of_loras,
lower=[-1.5] * number_of_loras,
)
optimizer = ng.optimizers.NGOpt(parametrization=instrum, budget=max_inference_step)
print("> Begin to perform gradient-free optimization ...")
recommendation = optimizer.minimize(get_score_partial, verbosity=1)
final_lora = get_final_weights(recommendation.value, lora_module_list, cache)
return recommendation, final_lora
|