File size: 1,385 Bytes
51c57f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
lang=$1
current_time=$(date "+%Y%m%d%H%M%S")
code_length=64
nl_length=64
model_type=multi-loss-cocosoda #"base", "cocosoda"
moco_k=1024
moco_m=0.999
lr=2e-5
moco_t=0.07
batch_size=64
max_steps=1000
save_steps=100
aug_type_way=random_replace_type
data_aug_type=random_mask
base_model=DeepSoftwareAnalytics/CoCoSoDa
epoch=5
# echo ${base_model}
CUDA_VISIBLE_DEVICES="0,1"
# exit 111
function fine-tune () {
output_dir=./saved_models/fine_tune/${lang}
mkdir -p $output_dir
echo ${output_dir}
CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} python run.py --eval_frequency 100 \
--moco_m ${moco_m} --moco_t ${moco_t} \
--model_type ${model_type} \
--output_dir ${output_dir} \
--data_aug_type ${data_aug_type} \
--moco_k ${moco_k} \
--config_name=${base_model} \
--model_name_or_path=${base_model} \
--tokenizer_name=${base_model} \
--lang=$lang \
--do_train \
--do_test \
--train_data_file=dataset/$lang/train.jsonl \
--eval_data_file=dataset/$lang/valid.jsonl \
--test_data_file=dataset/$lang/test.jsonl \
--codebase_file=dataset/$lang/codebase.jsonl \
--num_train_epochs ${epoch} \
--code_length ${code_length} \
--nl_length ${nl_length} \
--train_batch_size ${batch_size} \
--eval_batch_size 64 \
--learning_rate ${lr} \
--seed 123456 2>&1| tee ${output_dir}/running.log
}
fine-tune
|