|
import sys |
|
sys.path.insert(1, '/workspace/asr/peft/src') |
|
|
|
|
|
|
|
|
|
|
|
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig |
|
from peft import PeftModel, PeftConfig |
|
import os |
|
import torch |
|
|
|
|
|
cache_dir="/workspace/asr/peft/qlora" |
|
|
|
|
|
lazylora_dir=os.getcwd() |
|
|
|
config = PeftConfig.from_pretrained(lazylora_dir) |
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
config.base_model_name_or_path, |
|
cache_dir=cache_dir, |
|
use_auth_token=True |
|
) |
|
|
|
bnb_config = BitsAndBytesConfig( |
|
load_in_4bit=True, |
|
bnb_4bit_use_double_quant=True, |
|
bnb_4bit_quant_type='nf4', |
|
bnb_4bit_compute_dtype=torch.bfloat16 |
|
) |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
config.base_model_name_or_path, |
|
quantization_config=bnb_config, |
|
device_map="auto", |
|
cache_dir=cache_dir, |
|
use_auth_token=True |
|
) |
|
|
|
print(sum(p.numel() for p in model.parameters())) |
|
|
|
|
|
model = PeftModel.from_pretrained(model, lazylora_dir) |
|
print('after adding lazy lora parameters:') |
|
model.print_trainable_parameters() |
|
|
|
|
|
|
|
|