|
#!/bin/bash |
|
|
|
eval "$(conda shell.bash hook)" |
|
conda activate llama_factory |
|
|
|
MODEL_NAME=LMCocktail-10.7B-v1 |
|
STAGE=sft |
|
EPOCH=1 |
|
DATA=glaive-function-calling-v2 |
|
|
|
FT_TYPE=lora |
|
LoRA_TARGET=q_proj,v_proj |
|
TEMPLATE=solar |
|
PREDICTION_SAMPLES=20 |
|
|
|
MODEL_PATH=./models/$MODEL_NAME |
|
if [ ! -d $MODEL_PATH ]; then |
|
echo "Model not found: $MODEL_PATH" |
|
return 1 |
|
fi |
|
|
|
SAVE_PATH=./models/$STAGE/$MODEL_NAME-$STAGE-$DATA-ep$EPOCH-$FT_TYPE |
|
if [ ! -d $SAVE_PATH ]; then |
|
mkdir -p $SAVE_PATH |
|
fi |
|
|
|
DO_TRAIN=false |
|
DO_PREDICT=false |
|
DO_EXPORT=false |
|
|
|
for arg in "$@" |
|
do |
|
if [[ "$arg" == "--train" ]]; then |
|
echo "The '--train' argument is present in an argument: $arg" |
|
DO_TRAIN=true |
|
fi |
|
if [[ "$arg" == "--pred" ]]; then |
|
echo "The '--pred' argument is present in an argument: $arg" |
|
DO_PREDICT=true |
|
fi |
|
if [[ "$arg" == "--exp" ]]; then |
|
echo "The '--exp' argument is present in an argument: $arg" |
|
DO_EXPORT=true |
|
fi |
|
done |
|
|
|
if [ $DO_TRAIN == true ]; then |
|
accelerate launch src/train_bash.py \ |
|
--seed 42 \ |
|
--stage $STAGE \ |
|
--model_name_or_path $MODEL_PATH \ |
|
--dataset $DATA \ |
|
--val_size .1 \ |
|
--template $TEMPLATE \ |
|
--finetuning_type $FT_TYPE \ |
|
--do_train \ |
|
--lora_target $LoRA_TARGET \ |
|
--output_dir $SAVE_PATH \ |
|
--overwrite_output_dir \ |
|
--overwrite_cache \ |
|
--per_device_train_batch_size 1 \ |
|
--gradient_accumulation_steps 4 \ |
|
--lr_scheduler_type cosine \ |
|
--logging_steps 500 \ |
|
--save_steps 500 \ |
|
--learning_rate 5e-5 \ |
|
--num_train_epochs $EPOCH \ |
|
--do_eval \ |
|
--evaluation_strategy steps \ |
|
--per_device_eval_batch_size 1 \ |
|
--prediction_loss_only \ |
|
--plot_loss \ |
|
--quantization_bit 4 \ |
|
--report_to tensorboard \ |
|
|& tee $SAVE_PATH/train_eval_log.txt |
|
fi |
|
|
|
if [ $DO_PREDICT == true ]; then |
|
SAVE_PATH_PREDICT=$SAVE_PATH/Predict_$PREDICTION_SAMPLES |
|
if [ ! -d $SAVE_PATH_PREDICT ]; then |
|
mkdir -p $SAVE_PATH_PREDICT |
|
fi |
|
CUDA_VISIBLE_DEVICES=0 python src/train_bash.py \ |
|
--stage $STAGE \ |
|
--model_name_or_path $MODEL_PATH \ |
|
--do_predict \ |
|
--max_samples $PREDICTION_SAMPLES \ |
|
--predict_with_generate \ |
|
--dataset $DATA \ |
|
--template $TEMPLATE \ |
|
--finetuning_type $FT_TYPE \ |
|
--adapter_name_or_path $SAVE_PATH \ |
|
--output_dir $SAVE_PATH_PREDICT \ |
|
--per_device_eval_batch_size 1 \ |
|
|& tee $SAVE_PATH_PREDICT/predict_log.txt |
|
fi |
|
|
|
if [ $DO_EXPORT == true ]; then |
|
EXPORT_PATH=./models/export/$MODEL_NAME-$STAGE-$DATA-ep$EPOCH |
|
if [ ! -d $EXPORT_PATH ]; then |
|
mkdir -p $EXPORT_PATH |
|
fi |
|
CUDA_VISIBLE_DEVICES=0 python src/export_model.py \ |
|
--model_name_or_path $MODEL_PATH \ |
|
--adapter_name_or_path $SAVE_PATH \ |
|
--template $TEMPLATE \ |
|
--finetuning_type $FT_TYPE \ |
|
--export_dir $EXPORT_PATH \ |
|
--export_size 5 \ |
|
|& tee $EXPORT_PATH/export_log.txt |
|
fi |