|
import os |
|
import pytest |
|
|
|
from tests.utils import wrap_test_forked |
|
|
|
|
|
@pytest.mark.skipif(not os.getenv('BENCHMARK'), |
|
reason="Only valid on sufficiently large system and not normal part of testing." |
|
" Instead used to get eval scores for all models.") |
|
@pytest.mark.parametrize( |
|
"base_model", |
|
[ |
|
"h2oai/h2ogpt-oasst1-falcon-40b", |
|
"h2oai/h2ogpt-oig-oasst1-512-6_9b", |
|
"h2oai/h2ogpt-oig-oasst1-512-12b", |
|
"h2oai/h2ogpt-oig-oasst1-512-20b", |
|
"h2oai/h2ogpt-oasst1-512-12b", |
|
"h2oai/h2ogpt-oasst1-512-20b", |
|
"h2oai/h2ogpt-gm-oasst1-en-1024-20b", |
|
"databricks/dolly-v2-12b", |
|
"h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2", |
|
"ehartford/WizardLM-7B-Uncensored", |
|
"ehartford/WizardLM-13B-Uncensored", |
|
"AlekseyKorshuk/vicuna-7b", |
|
"TheBloke/stable-vicuna-13B-HF", |
|
"decapoda-research/llama-7b-hf", |
|
"decapoda-research/llama-13b-hf", |
|
"decapoda-research/llama-30b-hf", |
|
"junelee/wizard-vicuna-13b", |
|
"openaccess-ai-collective/wizard-mega-13b", |
|
] |
|
) |
|
@wrap_test_forked |
|
def test_score_eval(base_model): |
|
from src.gen import main |
|
main( |
|
base_model=base_model, |
|
chat=False, |
|
stream_output=False, |
|
gradio=False, |
|
eval_prompts_only_num=500, |
|
eval_as_output=False, |
|
num_beams=2, |
|
use_gpu_id=False, |
|
) |
|
|
|
|
|
@pytest.mark.skipif(not os.getenv('FALCONS'), reason="download purpose") |
|
@pytest.mark.parametrize( |
|
"base_model", |
|
[ |
|
"OpenAssistant/falcon-7b-sft-top1-696", |
|
"OpenAssistant/falcon-7b-sft-mix-2000", |
|
"h2oai/h2ogpt-oasst1-falcon-40b", |
|
"h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1", |
|
"h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2", |
|
"h2oai/h2ogpt-gm-oasst1-multilang-2048-falcon-7b", |
|
"OpenAssistant/falcon-40b-sft-top1-560", |
|
"OpenAssistant/falcon-40b-sft-mix-1226", |
|
] |
|
) |
|
@wrap_test_forked |
|
def test_get_falcons(base_model): |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
t = AutoTokenizer.from_pretrained(base_model, |
|
use_fast=False, |
|
padding_side="left", |
|
trust_remote_code=True, |
|
token=True, |
|
) |
|
assert t is not None |
|
m = AutoModelForCausalLM.from_pretrained(base_model, |
|
trust_remote_code=True, |
|
torch_dtype=torch.float16, |
|
token=True, |
|
) |
|
assert m is not None |
|
|
|
|
|
@pytest.mark.skipif(not os.getenv('LLAMA'), reason="LLaMa conversion") |
|
@wrap_test_forked |
|
def test_get_landmark_llama(): |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
from transformers import LlamaForCausalLM, LlamaTokenizer |
|
m = LlamaForCausalLM.from_pretrained("epfml/landmark-attention-llama7b-wdiff") |
|
t = LlamaTokenizer.from_pretrained("epfml/landmark-attention-llama7b-wdiff") |
|
assert m is not None and t is not None |
|
|
|
os.system(""" |
|
# |
|
# step 1, convert llama to HF format |
|
pip install protobuf==3.19.0 |
|
source ~/.bashrc.mamba |
|
mamba create -n trans |
|
conda activate trans |
|
conda install python=3.10 -y |
|
|
|
git clone https://github.com/epfml/landmark-attention.git |
|
pip install fire datasets |
|
git clone https://github.com/huggingface/transformers.git |
|
cd transformers |
|
pip install . |
|
pip install torch accelerate sentencepiece protobuf==3.19.0 |
|
# below requires LLaMa weights |
|
python src/transformers/models/llama/convert_llama_weights_to_hf.py --input_dir /data/jon/LLaMA --model_size 7B --output_dir llama_7B |
|
# |
|
# step 2, make landmark model (change hash if updated) |
|
mkdir -p epfml/landmark-attention-llama7b-wdiff |
|
cd epfml/landmark-attention-llama7b-wdiff |
|
ln -s ~/.cache/huggingface/hub/models--epfml--landmark-attention-llama7b-wdiff/snapshots/050562871ac72723b4ab674f0392b02cd9609842/* . |
|
cd ../../ |
|
python ../landmark-attention/llama/weight_diff.py recover --path_raw llama_7B --path_diff epfml/landmark-attention-llama7b-wdiff --path_tuned landmark_llama_7b |
|
""") |
|
|