File size: 1,385 Bytes
51c57f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
lang=$1
current_time=$(date "+%Y%m%d%H%M%S")

code_length=64
nl_length=64

model_type=multi-loss-cocosoda #"base", "cocosoda" 
moco_k=1024
moco_m=0.999
lr=2e-5
moco_t=0.07

batch_size=64
max_steps=1000
save_steps=100
aug_type_way=random_replace_type 
data_aug_type=random_mask

base_model=DeepSoftwareAnalytics/CoCoSoDa
epoch=5
# echo ${base_model}
CUDA_VISIBLE_DEVICES="0,1"
# exit 111


function fine-tune () {
output_dir=./saved_models/fine_tune/${lang}
mkdir -p $output_dir
echo ${output_dir}
 CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} python run.py   --eval_frequency  100 \
    --moco_m ${moco_m} --moco_t ${moco_t}  \
    --model_type ${model_type} \
    --output_dir ${output_dir}  \
    --data_aug_type ${data_aug_type} \
    --moco_k ${moco_k} \
    --config_name=${base_model}  \
    --model_name_or_path=${base_model} \
    --tokenizer_name=${base_model} \
    --lang=$lang \
    --do_train \
    --do_test \
    --train_data_file=dataset/$lang/train.jsonl \
    --eval_data_file=dataset/$lang/valid.jsonl  \
    --test_data_file=dataset/$lang/test.jsonl \
    --codebase_file=dataset/$lang/codebase.jsonl \
    --num_train_epochs ${epoch} \
    --code_length ${code_length} \
    --nl_length ${nl_length} \
    --train_batch_size ${batch_size} \
    --eval_batch_size 64 \
    --learning_rate ${lr} \
    --seed 123456 2>&1| tee ${output_dir}/running.log
}

fine-tune