File size: 31,368 Bytes
d5fa9ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 |
02/17/2024 13:45:37 - INFO - __main__ - device: cuda, n_gpu: 1
02/17/2024 13:45:41 - INFO - __main__ - +------------------------------------------------------------+--------------+----------+
| Layer Name | Output Shape | Param # |
+------------------------------------------------------------+--------------+----------+
| encoder.embeddings.word_embeddings.weight | [51451, 768] | 39514368 |
| encoder.embeddings.position_embeddings.weight | [1026, 768] | 787968 |
| encoder.embeddings.token_type_embeddings.weight | [10, 768] | 7680 |
| encoder.embeddings.LayerNorm.weight | [768] | 768 |
| encoder.embeddings.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.0.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.0.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.0.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.0.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.0.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.0.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.0.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.0.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.0.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.0.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.0.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.0.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.0.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.0.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.0.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.0.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.1.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.1.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.1.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.1.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.1.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.1.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.1.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.1.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.1.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.1.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.1.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.1.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.1.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.1.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.1.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.1.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.2.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.2.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.2.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.2.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.2.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.2.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.2.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.2.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.2.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.2.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.2.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.2.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.2.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.2.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.2.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.2.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.3.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.3.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.3.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.3.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.3.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.3.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.3.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.3.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.3.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.3.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.3.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.3.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.3.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.3.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.3.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.3.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.4.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.4.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.4.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.4.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.4.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.4.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.4.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.4.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.4.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.4.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.4.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.4.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.4.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.4.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.4.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.4.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.5.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.5.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.5.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.5.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.5.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.5.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.5.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.5.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.5.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.5.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.5.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.5.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.5.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.5.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.5.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.5.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.6.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.6.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.6.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.6.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.6.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.6.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.6.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.6.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.6.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.6.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.6.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.6.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.6.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.6.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.6.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.6.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.7.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.7.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.7.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.7.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.7.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.7.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.7.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.7.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.7.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.7.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.7.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.7.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.7.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.7.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.7.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.7.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.8.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.8.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.8.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.8.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.8.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.8.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.8.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.8.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.8.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.8.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.8.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.8.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.8.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.8.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.8.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.8.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.9.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.9.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.9.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.9.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.9.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.9.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.9.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.9.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.9.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.9.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.9.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.9.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.9.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.9.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.9.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.9.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.10.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.10.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.10.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.10.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.10.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.10.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.10.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.10.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.10.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.10.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.10.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.10.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.10.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.10.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.10.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.10.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.11.attention.self.query.weight | [768, 768] | 589824 |
| encoder.encoder.layer.11.attention.self.query.bias | [768] | 768 |
| encoder.encoder.layer.11.attention.self.key.weight | [768, 768] | 589824 |
| encoder.encoder.layer.11.attention.self.key.bias | [768] | 768 |
| encoder.encoder.layer.11.attention.self.value.weight | [768, 768] | 589824 |
| encoder.encoder.layer.11.attention.self.value.bias | [768] | 768 |
| encoder.encoder.layer.11.attention.output.dense.weight | [768, 768] | 589824 |
| encoder.encoder.layer.11.attention.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.11.attention.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.11.attention.output.LayerNorm.bias | [768] | 768 |
| encoder.encoder.layer.11.intermediate.dense.weight | [3072, 768] | 2359296 |
| encoder.encoder.layer.11.intermediate.dense.bias | [3072] | 3072 |
| encoder.encoder.layer.11.output.dense.weight | [768, 3072] | 2359296 |
| encoder.encoder.layer.11.output.dense.bias | [768] | 768 |
| encoder.encoder.layer.11.output.LayerNorm.weight | [768] | 768 |
| encoder.encoder.layer.11.output.LayerNorm.bias | [768] | 768 |
| encoder.pooler.dense.weight | [768, 768] | 589824 |
| encoder.pooler.dense.bias | [768] | 768 |
+------------------------------------------------------------+--------------+----------+
02/17/2024 13:45:41 - INFO - __main__ - Training/evaluation parameters Namespace(agg_way='avg', aug_type_way='random_replace_type', code_length=256, codebase_file='dataset/java/codebase.jsonl', config_name='DeepSoftwareAnalytics/CoCoSoDa', couninue_pre_train_data_files=['dataset/ruby/train.jsonl', 'dataset/java/train.jsonl'], data_aug_type='random_mask', data_flow_length=0, debug=False, device=device(type='cuda'), do_avg=False, do_continue_pre_trained=False, do_eval=False, do_fine_tune=False, do_ineer_loss=False, do_multi_lang_continue_pre_train=False, do_single_lang_continue_pre_train=False, do_test=True, do_train=True, do_whitening=False, do_zero_short=False, epoch=50, eval_batch_size=64, eval_data_file='dataset/java/valid.jsonl', eval_frequency=100, fp16=False, gradient_accumulation_steps=1, hidden_size=768, lang='java', learning_rate=2e-05, loaded_codebert_model_filename=None, loaded_model_filename=None, local_rank=-1, logging_steps=50, max_codeblock_num=10, max_grad_norm=1.0, max_steps=100, mlm_probability=0.1, mlp=False, moco_dim=768, moco_k=1024, moco_m=0.999, moco_t=0.07, moco_type='encoder_queue', model_name_or_path='DeepSoftwareAnalytics/CoCoSoDa', model_type='base', n_debug_samples=100, n_gpu=1, nl_length=128, num_train_epochs=5, num_warmup_steps=0, only_save_the_nl_code_vec=False, output_dir='./saved_models/fine_tune/java', print_align_unif_loss=False, save_evaluation_reuslt=False, save_evaluation_reuslt_dir=None, save_steps=50, seed=123456, test_data_file='dataset/java/test.jsonl', time_score=1, tokenizer_name='DeepSoftwareAnalytics/CoCoSoDa', train_batch_size=128, train_data_file='dataset/java/train.jsonl', use_best_mrr_model=False, weight_decay=0.01)
02/17/2024 13:48:46 - INFO - __main__ - *** Example ***
02/17/2024 13:48:46 - INFO - __main__ - idx: 0
02/17/2024 13:48:46 - INFO - __main__ - code_tokens: ['<s>', '<encoder-only>', '</s>', '@', '_Override', '_public', '_Image', 'Source', '_apply', '_(', '_Image', 'Source', '_input', '_)', '_{', '_final', '_int', '_[', '_]', '_[', '_]', '_pixel', 'Matrix', '_=', '_new', '_int', '_[', '_3', '_]', '_[', '_3', '_]', '_;', '_int', '_w', '_=', '_input', '_.', '_getWidth', '_(', '_)', '_;', '_int', '_h', '_=', '_input', '_.', '_getHeight', '_(', '_)', '_;', '_int', '_[', '_]', '_[', '_]', '_output', '_=', '_new', '_int', '_[', '_h', '_]', '_[', '_w', '_]', '_;', '_for', '_(', '_int', '_j', '_=', '_1', '_;', '_j', '_<', '_h', '_-', '_1', '_;', '_j', '_++', '_)', '_{', '_for', '_(', '_int', '_i', '_=', '_1', '_;', '_i', '_<', '_w', '_-', '_1', '_;', '_i', '_++', '_)', '_{', '_pixel', 'Matrix', '_[', '_0', '_]', '_[', '_0', '_]', '_=', '_input', '_.', '_get', 'R', '_(', '_i', '_-', '_1', '_,', '_j', '_-', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_0', '_]', '_[', '_1', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_-', '_1', '_,', '_j', '_)', '_;', '_pixel', 'Matrix', '_[', '_0', '_]', '_[', '_2', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_-', '_1', '_,', '_j', '_+', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_1', '_]', '_[', '_0', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_,', '_j', '_-', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_1', '_]', '_[', '_2', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_,', '_j', '_+', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_2', '_]', '_[', '_0', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_+', '_1', '_,', '_j', '_-', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_2', '_]', '_[', '_1', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_+', '_1', '_,', '_j', '_)', '_;', '_pixel', '</s>']
02/17/2024 13:48:46 - INFO - __main__ - code_ids: 0 6 2 150 19505 1240 6085 1768 5230 400 6085 1768 1586 743 399 1920 554 626 2406 626 2406 5578 3679 385 579 554 626 995 2406 626 995 2406 2476 554 477 385 1586 746 32671 400 743 2476 554 566 385 1586 746 32720 400 743 2476 554 626 2406 626 2406 1721 385 579 554 626 566 2406 626 477 2406 2476 563 400 554 913 385 524 2476 913 517 566 581 524 2476 913 1932 743 399 563 400 554 548 385 524 2476 548 517 477 581 524 2476 548 1932 743 399 5578 3679 626 461 2406 626 461 2406 385 1586 746 744 168 400 548 581 524 2019 913 581 524 743 2476 5578 3679 626 461 2406 626 524 2406 385 1586 746 744 7664 400 548 581 524 2019 913 743 2476 5578 3679 626 461 2406 626 688 2406 385 1586 746 744 7664 400 548 581 524 2019 913 513 524 743 2476 5578 3679 626 524 2406 626 461 2406 385 1586 746 744 7664 400 548 2019 913 581 524 743 2476 5578 3679 626 524 2406 626 688 2406 385 1586 746 744 7664 400 548 2019 913 513 524 743 2476 5578 3679 626 688 2406 626 461 2406 385 1586 746 744 7664 400 548 513 524 2019 913 581 524 743 2476 5578 3679 626 688 2406 626 524 2406 385 1586 746 744 7664 400 548 513 524 2019 913 743 2476 5578 2
02/17/2024 13:48:46 - INFO - __main__ - nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Expect', 's', '_a', '_height', '_mat', '_as', '_input', '</s>']
02/17/2024 13:48:46 - INFO - __main__ - nl_ids: 0 6 2 7871 201 434 3082 5772 880 1586 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
02/17/2024 13:48:46 - INFO - __main__ - *** Example ***
02/17/2024 13:48:46 - INFO - __main__ - idx: 1
02/17/2024 13:48:46 - INFO - __main__ - code_tokens: ['<s>', '<encoder-only>', '</s>', 'public', '_<', '_L', 'extends', 'Listener', '_>', '_void', '_pop', 'Event', '_(', '_Event', '_<', '_?', '_,', '_L', '_>', '_expected', '_)', '_{', '_synchronized', '_(', '_this', '_.', '_stack', '_)', '_{', '_final', '_Event', '_<', '_?', '_,', '_?', '_>', '_actual', '_=', '_this', '_.', '_stack', '_.', '_pop', '_(', '_)', '_;', '_if', '_(', '_actual', '_!=', '_expected', '_)', '_{', '_throw', '_new', '_IllegalStateException', '_(', '_String', '_.', '_format', '_(', '"', 'Un', 'balanced', '_pop', ':', '_expected', "_'%", 's', "'", '_but', '_encountered', "_'%", 's', "'", '"', ',', '_expected', '_.', '_get', 'Listener', 'Class', '_(', '_)', '_,', '_actual', '_)', '_)', '_;', '_}', '_}', '_}', '</s>']
02/17/2024 13:48:46 - INFO - __main__ - code_ids: 0 6 2 653 517 747 13125 2486 711 723 5012 1089 400 3916 517 999 2019 747 711 2048 743 399 9401 400 547 746 3325 743 399 1920 3916 517 999 2019 999 711 3780 385 547 746 3325 746 5012 400 743 2476 462 400 3780 620 2048 743 399 1185 579 16219 400 1167 746 2021 400 120 965 37707 5012 144 2048 3421 201 125 2107 17038 3421 201 125 120 130 2048 746 744 2486 1128 400 743 2019 3780 743 743 2476 425 425 425 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
02/17/2024 13:48:46 - INFO - __main__ - nl_tokens: ['<s>', '<encoder-only>', '</s>', 'P', 'ops', '_the', '_top', '_event', '_off', '_the', '_current', '_event', '_stack', '_.', '_This', '_action', '_has', '_to', '_be', '_performed', '_immediately', '_after', '_the', '_event', '_has', '_been', '_dispatched', '_to', '_all', '_listeners', '_.', '</s>']
02/17/2024 13:48:46 - INFO - __main__ - nl_ids: 0 6 2 166 2489 448 3194 1488 3413 448 1434 1488 3325 746 1600 2657 1559 508 661 13181 10086 2493 448 1488 1559 3022 43340 508 1345 11839 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
02/17/2024 13:48:46 - INFO - __main__ - *** Example ***
02/17/2024 13:48:46 - INFO - __main__ - idx: 2
02/17/2024 13:48:46 - INFO - __main__ - code_tokens: ['<s>', '<encoder-only>', '</s>', 'protected', '_void', '_modify', '_(', '_Transaction', '_t', '_)', '_{', '_try', '_{', '_this', '_.', '_lock', '_.', '_write', 'Lock', '_(', '_)', '_.', '_lock', '_(', '_)', '_;', '_t', '_.', '_perform', '_(', '_)', '_;', '_}', '_finally', '_{', '_this', '_.', '_lock', '_.', '_write', 'Lock', '_(', '_)', '_.', '_unlock', '_(', '_)', '_;', '_}', '_}', '</s>']
02/17/2024 13:48:46 - INFO - __main__ - code_ids: 0 6 2 1933 723 8660 400 13081 422 743 399 1568 399 547 746 3505 746 2250 2896 400 743 746 3505 400 743 2476 422 746 4729 400 743 2476 425 6110 399 547 746 3505 746 2250 2896 400 743 746 14552 400 743 2476 425 425 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
02/17/2024 13:48:46 - INFO - __main__ - nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Executes', '_the', '_given', '_transaction', '_within', '_the', '_con', 'text', 'of', '_a', '_write', '_lock', '_.', '</s>']
02/17/2024 13:48:46 - INFO - __main__ - nl_ids: 0 6 2 40551 448 2076 4993 5289 448 549 625 757 434 2250 3505 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
02/17/2024 13:48:46 - INFO - __main__ - ***** Running training *****
02/17/2024 13:48:46 - INFO - __main__ - Num examples = 164923
02/17/2024 13:48:46 - INFO - __main__ - Num Epochs = 5
02/17/2024 13:48:46 - INFO - __main__ - Num quene = 1024
02/17/2024 13:48:46 - INFO - __main__ - Instantaneous batch size per GPU = 128
02/17/2024 13:48:46 - INFO - __main__ - Total train batch size = 128
02/17/2024 13:48:46 - INFO - __main__ - Total optimization steps = 6440
Traceback (most recent call last):
File "run.py", line 1188, in <module>
main()
File "run.py", line 1154, in main
train(args, model, tokenizer, pool)
File "run.py", line 585, in train
code_vec = model(code_inputs=code_inputs)
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/yiming/cocosoda/CoCoSoDa/model.py", line 40, in forward
outputs = self.encoder(code_inputs,attention_mask=code_inputs.ne(1))[0] #[bs, seq_len, dim]
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1120, in _call_impl
result = forward_call(*input, **kwargs)
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 860, in forward
return_dict=return_dict,
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 531, in forward
output_attentions,
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 415, in forward
past_key_value=self_attn_past_key_value,
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 344, in forward
output_attentions,
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 267, in forward
attention_probs = self.dropout(attention_probs)
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
return forward_call(*input, **kwargs)
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/dropout.py", line 58, in forward
return F.dropout(input, self.p, self.training, self.inplace)
File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/functional.py", line 1169, in dropout
return _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, training)
RuntimeError: CUDA out of memory. Tried to allocate 384.00 MiB (GPU 0; 14.75 GiB total capacity; 12.96 GiB already allocated; 173.94 MiB free; 13.02 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
|