Peverell
/

code_training_dynamic

Model card Files Files and versions Community

code_training_dynamic / saved_models /fine_tune /java /running.log

SalazarPevelll

d5fa9ad 10 months ago

31.4 kB

	02/17/2024 13:45:37 - INFO - __main__ - device: cuda, n_gpu: 1
	02/17/2024 13:45:41 - INFO - __main__ - +------------------------------------------------------------+--------------+----------+
	\| Layer Name \| Output Shape \| Param # \|
	+------------------------------------------------------------+--------------+----------+
	\| encoder.embeddings.word_embeddings.weight \| [51451, 768] \| 39514368 \|
	\| encoder.embeddings.position_embeddings.weight \| [1026, 768] \| 787968 \|
	\| encoder.embeddings.token_type_embeddings.weight \| [10, 768] \| 7680 \|
	\| encoder.embeddings.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.embeddings.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.0.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.0.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.0.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.0.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.0.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.0.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.0.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.0.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.0.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.0.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.0.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.0.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.0.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.0.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.0.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.0.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.1.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.1.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.1.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.1.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.1.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.1.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.1.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.1.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.1.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.1.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.1.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.1.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.1.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.1.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.1.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.1.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.2.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.2.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.2.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.2.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.2.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.2.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.2.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.2.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.2.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.2.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.2.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.2.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.2.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.2.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.2.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.2.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.3.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.3.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.3.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.3.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.3.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.3.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.3.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.3.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.3.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.3.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.3.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.3.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.3.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.3.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.3.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.3.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.4.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.4.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.4.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.4.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.4.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.4.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.4.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.4.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.4.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.4.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.4.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.4.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.4.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.4.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.4.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.4.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.5.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.5.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.5.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.5.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.5.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.5.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.5.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.5.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.5.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.5.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.5.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.5.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.5.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.5.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.5.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.5.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.6.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.6.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.6.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.6.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.6.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.6.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.6.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.6.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.6.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.6.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.6.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.6.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.6.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.6.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.6.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.6.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.7.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.7.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.7.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.7.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.7.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.7.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.7.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.7.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.7.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.7.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.7.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.7.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.7.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.7.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.7.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.7.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.8.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.8.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.8.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.8.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.8.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.8.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.8.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.8.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.8.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.8.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.8.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.8.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.8.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.8.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.8.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.8.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.9.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.9.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.9.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.9.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.9.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.9.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.9.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.9.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.9.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.9.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.9.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.9.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.9.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.9.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.9.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.9.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.10.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.10.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.10.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.10.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.10.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.10.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.10.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.10.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.10.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.10.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.10.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.10.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.10.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.10.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.10.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.10.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.11.attention.self.query.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.11.attention.self.query.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.11.attention.self.key.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.11.attention.self.key.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.11.attention.self.value.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.11.attention.self.value.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.11.attention.output.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.encoder.layer.11.attention.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.11.attention.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.11.attention.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.11.intermediate.dense.weight \| [3072, 768] \| 2359296 \|
	\| encoder.encoder.layer.11.intermediate.dense.bias \| [3072] \| 3072 \|
	\| encoder.encoder.layer.11.output.dense.weight \| [768, 3072] \| 2359296 \|
	\| encoder.encoder.layer.11.output.dense.bias \| [768] \| 768 \|
	\| encoder.encoder.layer.11.output.LayerNorm.weight \| [768] \| 768 \|
	\| encoder.encoder.layer.11.output.LayerNorm.bias \| [768] \| 768 \|
	\| encoder.pooler.dense.weight \| [768, 768] \| 589824 \|
	\| encoder.pooler.dense.bias \| [768] \| 768 \|
	+------------------------------------------------------------+--------------+----------+
	02/17/2024 13:45:41 - INFO - __main__ - Training/evaluation parameters Namespace(agg_way='avg', aug_type_way='random_replace_type', code_length=256, codebase_file='dataset/java/codebase.jsonl', config_name='DeepSoftwareAnalytics/CoCoSoDa', couninue_pre_train_data_files=['dataset/ruby/train.jsonl', 'dataset/java/train.jsonl'], data_aug_type='random_mask', data_flow_length=0, debug=False, device=device(type='cuda'), do_avg=False, do_continue_pre_trained=False, do_eval=False, do_fine_tune=False, do_ineer_loss=False, do_multi_lang_continue_pre_train=False, do_single_lang_continue_pre_train=False, do_test=True, do_train=True, do_whitening=False, do_zero_short=False, epoch=50, eval_batch_size=64, eval_data_file='dataset/java/valid.jsonl', eval_frequency=100, fp16=False, gradient_accumulation_steps=1, hidden_size=768, lang='java', learning_rate=2e-05, loaded_codebert_model_filename=None, loaded_model_filename=None, local_rank=-1, logging_steps=50, max_codeblock_num=10, max_grad_norm=1.0, max_steps=100, mlm_probability=0.1, mlp=False, moco_dim=768, moco_k=1024, moco_m=0.999, moco_t=0.07, moco_type='encoder_queue', model_name_or_path='DeepSoftwareAnalytics/CoCoSoDa', model_type='base', n_debug_samples=100, n_gpu=1, nl_length=128, num_train_epochs=5, num_warmup_steps=0, only_save_the_nl_code_vec=False, output_dir='./saved_models/fine_tune/java', print_align_unif_loss=False, save_evaluation_reuslt=False, save_evaluation_reuslt_dir=None, save_steps=50, seed=123456, test_data_file='dataset/java/test.jsonl', time_score=1, tokenizer_name='DeepSoftwareAnalytics/CoCoSoDa', train_batch_size=128, train_data_file='dataset/java/train.jsonl', use_best_mrr_model=False, weight_decay=0.01)
	02/17/2024 13:48:46 - INFO - __main__ - * Example *
	02/17/2024 13:48:46 - INFO - __main__ - idx: 0
	02/17/2024 13:48:46 - INFO - __main__ - code_tokens: ['<s>', '<encoder-only>', '</s>', '@', '_Override', '_public', '_Image', 'Source', '_apply', '_(', '_Image', 'Source', '_input', '_)', '_{', '_final', '_int', '_[', '_]', '_[', '_]', '_pixel', 'Matrix', '_=', '_new', '_int', '_[', '_3', '_]', '_[', '_3', '_]', '_;', '_int', '_w', '_=', '_input', '_.', '_getWidth', '_(', '_)', '_;', '_int', '_h', '_=', '_input', '_.', '_getHeight', '_(', '_)', '_;', '_int', '_[', '_]', '_[', '_]', '_output', '_=', '_new', '_int', '_[', '_h', '_]', '_[', '_w', '_]', '_;', '_for', '_(', '_int', '_j', '_=', '_1', '_;', '_j', '_<', '_h', '_-', '_1', '_;', '_j', '_++', '_)', '_{', '_for', '_(', '_int', '_i', '_=', '_1', '_;', '_i', '_<', '_w', '_-', '_1', '_;', '_i', '_++', '_)', '_{', '_pixel', 'Matrix', '_[', '_0', '_]', '_[', '_0', '_]', '_=', '_input', '_.', '_get', 'R', '_(', '_i', '_-', '_1', '_,', '_j', '_-', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_0', '_]', '_[', '_1', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_-', '_1', '_,', '_j', '_)', '_;', '_pixel', 'Matrix', '_[', '_0', '_]', '_[', '_2', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_-', '_1', '_,', '_j', '_+', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_1', '_]', '_[', '_0', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_,', '_j', '_-', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_1', '_]', '_[', '_2', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_,', '_j', '_+', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_2', '_]', '_[', '_0', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_+', '_1', '_,', '_j', '_-', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_2', '_]', '_[', '_1', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_+', '_1', '_,', '_j', '_)', '_;', '_pixel', '</s>']
	02/17/2024 13:48:46 - INFO - __main__ - code_ids: 0 6 2 150 19505 1240 6085 1768 5230 400 6085 1768 1586 743 399 1920 554 626 2406 626 2406 5578 3679 385 579 554 626 995 2406 626 995 2406 2476 554 477 385 1586 746 32671 400 743 2476 554 566 385 1586 746 32720 400 743 2476 554 626 2406 626 2406 1721 385 579 554 626 566 2406 626 477 2406 2476 563 400 554 913 385 524 2476 913 517 566 581 524 2476 913 1932 743 399 563 400 554 548 385 524 2476 548 517 477 581 524 2476 548 1932 743 399 5578 3679 626 461 2406 626 461 2406 385 1586 746 744 168 400 548 581 524 2019 913 581 524 743 2476 5578 3679 626 461 2406 626 524 2406 385 1586 746 744 7664 400 548 581 524 2019 913 743 2476 5578 3679 626 461 2406 626 688 2406 385 1586 746 744 7664 400 548 581 524 2019 913 513 524 743 2476 5578 3679 626 524 2406 626 461 2406 385 1586 746 744 7664 400 548 2019 913 581 524 743 2476 5578 3679 626 524 2406 626 688 2406 385 1586 746 744 7664 400 548 2019 913 513 524 743 2476 5578 3679 626 688 2406 626 461 2406 385 1586 746 744 7664 400 548 513 524 2019 913 581 524 743 2476 5578 3679 626 688 2406 626 524 2406 385 1586 746 744 7664 400 548 513 524 2019 913 743 2476 5578 2
	02/17/2024 13:48:46 - INFO - __main__ - nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Expect', 's', '_a', '_height', '_mat', '_as', '_input', '</s>']
	02/17/2024 13:48:46 - INFO - __main__ - nl_ids: 0 6 2 7871 201 434 3082 5772 880 1586 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
	02/17/2024 13:48:46 - INFO - __main__ - * Example *
	02/17/2024 13:48:46 - INFO - __main__ - idx: 1
	02/17/2024 13:48:46 - INFO - __main__ - code_tokens: ['<s>', '<encoder-only>', '</s>', 'public', '_<', '_L', 'extends', 'Listener', '_>', '_void', '_pop', 'Event', '_(', '_Event', '_<', '_?', '_,', '_L', '_>', '_expected', '_)', '_{', '_synchronized', '_(', '_this', '_.', '_stack', '_)', '_{', '_final', '_Event', '_<', '_?', '_,', '_?', '_>', '_actual', '_=', '_this', '_.', '_stack', '_.', '_pop', '_(', '_)', '_;', '_if', '_(', '_actual', '_!=', '_expected', '_)', '_{', '_throw', '_new', '_IllegalStateException', '_(', '_String', '_.', '_format', '_(', '"', 'Un', 'balanced', '_pop', ':', '_expected', "_'%", 's', "'", '_but', '_encountered', "_'%", 's', "'", '"', ',', '_expected', '_.', '_get', 'Listener', 'Class', '_(', '_)', '_,', '_actual', '_)', '_)', '_;', '_}', '_}', '_}', '</s>']
	02/17/2024 13:48:46 - INFO - __main__ - code_ids: 0 6 2 653 517 747 13125 2486 711 723 5012 1089 400 3916 517 999 2019 747 711 2048 743 399 9401 400 547 746 3325 743 399 1920 3916 517 999 2019 999 711 3780 385 547 746 3325 746 5012 400 743 2476 462 400 3780 620 2048 743 399 1185 579 16219 400 1167 746 2021 400 120 965 37707 5012 144 2048 3421 201 125 2107 17038 3421 201 125 120 130 2048 746 744 2486 1128 400 743 2019 3780 743 743 2476 425 425 425 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
	02/17/2024 13:48:46 - INFO - __main__ - nl_tokens: ['<s>', '<encoder-only>', '</s>', 'P', 'ops', '_the', '_top', '_event', '_off', '_the', '_current', '_event', '_stack', '_.', '_This', '_action', '_has', '_to', '_be', '_performed', '_immediately', '_after', '_the', '_event', '_has', '_been', '_dispatched', '_to', '_all', '_listeners', '_.', '</s>']
	02/17/2024 13:48:46 - INFO - __main__ - nl_ids: 0 6 2 166 2489 448 3194 1488 3413 448 1434 1488 3325 746 1600 2657 1559 508 661 13181 10086 2493 448 1488 1559 3022 43340 508 1345 11839 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
	02/17/2024 13:48:46 - INFO - __main__ - * Example *
	02/17/2024 13:48:46 - INFO - __main__ - idx: 2
	02/17/2024 13:48:46 - INFO - __main__ - code_tokens: ['<s>', '<encoder-only>', '</s>', 'protected', '_void', '_modify', '_(', '_Transaction', '_t', '_)', '_{', '_try', '_{', '_this', '_.', '_lock', '_.', '_write', 'Lock', '_(', '_)', '_.', '_lock', '_(', '_)', '_;', '_t', '_.', '_perform', '_(', '_)', '_;', '_}', '_finally', '_{', '_this', '_.', '_lock', '_.', '_write', 'Lock', '_(', '_)', '_.', '_unlock', '_(', '_)', '_;', '_}', '_}', '</s>']
	02/17/2024 13:48:46 - INFO - __main__ - code_ids: 0 6 2 1933 723 8660 400 13081 422 743 399 1568 399 547 746 3505 746 2250 2896 400 743 746 3505 400 743 2476 422 746 4729 400 743 2476 425 6110 399 547 746 3505 746 2250 2896 400 743 746 14552 400 743 2476 425 425 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
	02/17/2024 13:48:46 - INFO - __main__ - nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Executes', '_the', '_given', '_transaction', '_within', '_the', '_con', 'text', 'of', '_a', '_write', '_lock', '_.', '</s>']
	02/17/2024 13:48:46 - INFO - __main__ - nl_ids: 0 6 2 40551 448 2076 4993 5289 448 549 625 757 434 2250 3505 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
	02/17/2024 13:48:46 - INFO - __main__ - *** Running training ***
	02/17/2024 13:48:46 - INFO - __main__ - Num examples = 164923
	02/17/2024 13:48:46 - INFO - __main__ - Num Epochs = 5
	02/17/2024 13:48:46 - INFO - __main__ - Num quene = 1024
	02/17/2024 13:48:46 - INFO - __main__ - Instantaneous batch size per GPU = 128
	02/17/2024 13:48:46 - INFO - __main__ - Total train batch size = 128
	02/17/2024 13:48:46 - INFO - __main__ - Total optimization steps = 6440
	Traceback (most recent call last):
	File "run.py", line 1188, in <module>
	main()
	File "run.py", line 1154, in main
	train(args, model, tokenizer, pool)
	File "run.py", line 585, in train
	code_vec = model(code_inputs=code_inputs)
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
	return forward_call(input, *kwargs)
	File "/home/yiming/cocosoda/CoCoSoDa/model.py", line 40, in forward
	outputs = self.encoder(code_inputs,attention_mask=code_inputs.ne(1))[0] #[bs, seq_len, dim]
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1120, in _call_impl
	result = forward_call(input, *kwargs)
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 860, in forward
	return_dict=return_dict,
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
	return forward_call(input, *kwargs)
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 531, in forward
	output_attentions,
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
	return forward_call(input, *kwargs)
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 415, in forward
	past_key_value=self_attn_past_key_value,
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
	return forward_call(input, *kwargs)
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 344, in forward
	output_attentions,
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
	return forward_call(input, *kwargs)
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 267, in forward
	attention_probs = self.dropout(attention_probs)
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
	return forward_call(input, *kwargs)
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/dropout.py", line 58, in forward
	return F.dropout(input, self.p, self.training, self.inplace)
	File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/functional.py", line 1169, in dropout
	return _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, training)
	RuntimeError: CUDA out of memory. Tried to allocate 384.00 MiB (GPU 0; 14.75 GiB total capacity; 12.96 GiB already allocated; 173.94 MiB free; 13.02 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF