Peverell
/

code_training_dynamic

Model card Files Files and versions Community

SalazarPevelll commited on Feb 26

Commit

d5fa9ad

•

1 Parent(s): f1cc81a

td

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

saved_models/cocosoda/save_tokenizer.log +347 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_1/base_dvi.pth +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_1/bgimg.png +0 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_1/embedding.npy +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_1/index.json +0 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_1/scale.npy +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_1/subject_model.pth +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_1/test_data.npy +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_1/train_data.npy +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_2/base_dvi.pth +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_2/bgimg.png +0 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_2/embedding.npy +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_2/index.json +0 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_2/scale.npy +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_2/subject_model.pth +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_2/test_data.npy +3 -0
saved_models/codesearch_contrastive_learning/Model/Epoch_2/train_data.npy +3 -0
saved_models/codesearch_contrastive_learning/Model/__pycache__/model.cpython-37.pyc +0 -0
saved_models/codesearch_contrastive_learning/Model/__pycache__/model.cpython-38.pyc +0 -0
saved_models/codesearch_contrastive_learning/Model/model-cs.py +396 -0
saved_models/codesearch_contrastive_learning/Model/model.py +453 -0
saved_models/codesearch_contrastive_learning/Model/time_base_dvi.json +1 -0
saved_models/codesearch_contrastive_learning/Testing_data/testing_dataset_label.pth +3 -0
saved_models/codesearch_contrastive_learning/Training_data/training_dataset_label.pth +3 -0
saved_models/codesearch_contrastive_learning/config.json +101 -0
saved_models/codesearch_contrastive_learning/config_dvi_modi.json +55 -0
saved_models/codesearch_contrastive_learning/iteration_structure.json +12 -0
saved_models/fine_tune/Ruby/running.log +215 -0
saved_models/fine_tune/java/running.log +268 -0
saved_models/fine_tune/ruby/0/model.bin +3 -0
saved_models/fine_tune/ruby/1/all_code_vec.npy +3 -0
saved_models/fine_tune/ruby/1/all_nl_vec.npy +3 -0
saved_models/fine_tune/ruby/1/model.bin +3 -0
saved_models/fine_tune/ruby/1/test_all_code_vec.npy +3 -0
saved_models/fine_tune/ruby/1/test_all_nl_vec.npy +3 -0
saved_models/fine_tune/ruby/2/all_code_vec.npy +3 -0
saved_models/fine_tune/ruby/2/all_nl_vec.npy +3 -0
saved_models/fine_tune/ruby/2/model.bin +3 -0
saved_models/fine_tune/ruby/2/test_all_code_vec.npy +3 -0
saved_models/fine_tune/ruby/2/test_all_nl_vec.npy +3 -0
saved_models/fine_tune/ruby/3/all_code_vec.npy +3 -0
saved_models/fine_tune/ruby/3/all_nl_vec.npy +3 -0
saved_models/fine_tune/ruby/3/model.bin +3 -0
saved_models/fine_tune/ruby/3/test_all_code_vec.npy +3 -0
saved_models/fine_tune/ruby/3/test_all_nl_vec.npy +3 -0
saved_models/fine_tune/ruby/4/model.bin +3 -0
saved_models/fine_tune/ruby/checkpoint-best-mrr/model.bin +3 -0
saved_models/fine_tune/ruby/docstring_list.json +0 -0
saved_models/fine_tune/ruby/result.jsonl +1 -0
saved_models/fine_tune/ruby/running.log +5 -0

saved_models/cocosoda/save_tokenizer.log ADDED Viewed

	@@ -0,0 +1,347 @@

+02/17/2024 15:12:04 - INFO - __main__ -   device: cuda, n_gpu: 2
+02/17/2024 15:12:08 - INFO - __main__ -    new token {'additional_special_tokens': ['global_variable', 'heredoc_end', 'decimal_floating_point_literal', 'rune_literal', 'int_literal', 'ERROR', 'class', 'heredoc_content', 'field_identifier', 'name', 'string', 'hash_key_symbol', 'hex_integer_literal', 'statement_identifier', 'boolean', 'separators', 'escape_sequence', 'boolean_type', 'regex_flags', 'string_fragment', 'identifier', 'instance_variable', 'regex_pattern', 'decimal_integer_literal', 'raw_string_literal', 'property_identifier', 'operator', 'label_name', 'namespace', 'string_literal', 'package_identifier', 'float_literal', 'integer', 'php_tag', 'shorthand_property_identifier', 'shorthand_property_identifier_pattern', 'extends', 'none', 'text', 'void_type', 'null_literal', 'heredoc_beginning', 'keyword', 'simple_symbol', 'type_identifier', 'character_literal', 'string_content', 'comment', 'number', '"', 'constant', 'class_variable']}
+02/17/2024 15:12:09 - INFO - __main__ -   +-------------------------------------------------------------------+--------------+----------+
+| Layer Name                                                        | Output Shape |  Param # |
++-------------------------------------------------------------------+--------------+----------+
+| code_encoder_q.embeddings.word_embeddings.weight                  | [51451, 768] | 39514368 |
+| code_encoder_q.embeddings.position_embeddings.weight              |  [1026, 768] |   787968 |
+| code_encoder_q.embeddings.token_type_embeddings.weight            |    [10, 768] |     7680 |
+| code_encoder_q.embeddings.LayerNorm.weight                        |        [768] |      768 |
+| code_encoder_q.embeddings.LayerNorm.bias                          |        [768] |      768 |
+| code_encoder_q.encoder.layer.0.attention.self.query.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.0.attention.self.query.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.0.attention.self.key.weight          |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.0.attention.self.key.bias            |        [768] |      768 |
+| code_encoder_q.encoder.layer.0.attention.self.value.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.0.attention.self.value.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.0.attention.output.dense.weight      |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.0.attention.output.dense.bias        |        [768] |      768 |
+| code_encoder_q.encoder.layer.0.attention.output.LayerNorm.weight  |        [768] |      768 |
+| code_encoder_q.encoder.layer.0.attention.output.LayerNorm.bias    |        [768] |      768 |
+| code_encoder_q.encoder.layer.0.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.0.intermediate.dense.bias            |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.0.output.dense.weight                |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.0.output.dense.bias                  |        [768] |      768 |
+| code_encoder_q.encoder.layer.0.output.LayerNorm.weight            |        [768] |      768 |
+| code_encoder_q.encoder.layer.0.output.LayerNorm.bias              |        [768] |      768 |
+| code_encoder_q.encoder.layer.1.attention.self.query.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.1.attention.self.query.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.1.attention.self.key.weight          |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.1.attention.self.key.bias            |        [768] |      768 |
+| code_encoder_q.encoder.layer.1.attention.self.value.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.1.attention.self.value.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.1.attention.output.dense.weight      |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.1.attention.output.dense.bias        |        [768] |      768 |
+| code_encoder_q.encoder.layer.1.attention.output.LayerNorm.weight  |        [768] |      768 |
+| code_encoder_q.encoder.layer.1.attention.output.LayerNorm.bias    |        [768] |      768 |
+| code_encoder_q.encoder.layer.1.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.1.intermediate.dense.bias            |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.1.output.dense.weight                |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.1.output.dense.bias                  |        [768] |      768 |
+| code_encoder_q.encoder.layer.1.output.LayerNorm.weight            |        [768] |      768 |
+| code_encoder_q.encoder.layer.1.output.LayerNorm.bias              |        [768] |      768 |
+| code_encoder_q.encoder.layer.2.attention.self.query.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.2.attention.self.query.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.2.attention.self.key.weight          |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.2.attention.self.key.bias            |        [768] |      768 |
+| code_encoder_q.encoder.layer.2.attention.self.value.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.2.attention.self.value.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.2.attention.output.dense.weight      |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.2.attention.output.dense.bias        |        [768] |      768 |
+| code_encoder_q.encoder.layer.2.attention.output.LayerNorm.weight  |        [768] |      768 |
+| code_encoder_q.encoder.layer.2.attention.output.LayerNorm.bias    |        [768] |      768 |
+| code_encoder_q.encoder.layer.2.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.2.intermediate.dense.bias            |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.2.output.dense.weight                |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.2.output.dense.bias                  |        [768] |      768 |
+| code_encoder_q.encoder.layer.2.output.LayerNorm.weight            |        [768] |      768 |
+| code_encoder_q.encoder.layer.2.output.LayerNorm.bias              |        [768] |      768 |
+| code_encoder_q.encoder.layer.3.attention.self.query.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.3.attention.self.query.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.3.attention.self.key.weight          |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.3.attention.self.key.bias            |        [768] |      768 |
+| code_encoder_q.encoder.layer.3.attention.self.value.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.3.attention.self.value.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.3.attention.output.dense.weight      |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.3.attention.output.dense.bias        |        [768] |      768 |
+| code_encoder_q.encoder.layer.3.attention.output.LayerNorm.weight  |        [768] |      768 |
+| code_encoder_q.encoder.layer.3.attention.output.LayerNorm.bias    |        [768] |      768 |
+| code_encoder_q.encoder.layer.3.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.3.intermediate.dense.bias            |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.3.output.dense.weight                |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.3.output.dense.bias                  |        [768] |      768 |
+| code_encoder_q.encoder.layer.3.output.LayerNorm.weight            |        [768] |      768 |
+| code_encoder_q.encoder.layer.3.output.LayerNorm.bias              |        [768] |      768 |
+| code_encoder_q.encoder.layer.4.attention.self.query.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.4.attention.self.query.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.4.attention.self.key.weight          |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.4.attention.self.key.bias            |        [768] |      768 |
+| code_encoder_q.encoder.layer.4.attention.self.value.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.4.attention.self.value.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.4.attention.output.dense.weight      |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.4.attention.output.dense.bias        |        [768] |      768 |
+| code_encoder_q.encoder.layer.4.attention.output.LayerNorm.weight  |        [768] |      768 |
+| code_encoder_q.encoder.layer.4.attention.output.LayerNorm.bias    |        [768] |      768 |
+| code_encoder_q.encoder.layer.4.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.4.intermediate.dense.bias            |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.4.output.dense.weight                |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.4.output.dense.bias                  |        [768] |      768 |
+| code_encoder_q.encoder.layer.4.output.LayerNorm.weight            |        [768] |      768 |
+| code_encoder_q.encoder.layer.4.output.LayerNorm.bias              |        [768] |      768 |
+| code_encoder_q.encoder.layer.5.attention.self.query.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.5.attention.self.query.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.5.attention.self.key.weight          |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.5.attention.self.key.bias            |        [768] |      768 |
+| code_encoder_q.encoder.layer.5.attention.self.value.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.5.attention.self.value.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.5.attention.output.dense.weight      |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.5.attention.output.dense.bias        |        [768] |      768 |
+| code_encoder_q.encoder.layer.5.attention.output.LayerNorm.weight  |        [768] |      768 |
+| code_encoder_q.encoder.layer.5.attention.output.LayerNorm.bias    |        [768] |      768 |
+| code_encoder_q.encoder.layer.5.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.5.intermediate.dense.bias            |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.5.output.dense.weight                |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.5.output.dense.bias                  |        [768] |      768 |
+| code_encoder_q.encoder.layer.5.output.LayerNorm.weight            |        [768] |      768 |
+| code_encoder_q.encoder.layer.5.output.LayerNorm.bias              |        [768] |      768 |
+| code_encoder_q.encoder.layer.6.attention.self.query.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.6.attention.self.query.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.6.attention.self.key.weight          |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.6.attention.self.key.bias            |        [768] |      768 |
+| code_encoder_q.encoder.layer.6.attention.self.value.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.6.attention.self.value.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.6.attention.output.dense.weight      |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.6.attention.output.dense.bias        |        [768] |      768 |
+| code_encoder_q.encoder.layer.6.attention.output.LayerNorm.weight  |        [768] |      768 |
+| code_encoder_q.encoder.layer.6.attention.output.LayerNorm.bias    |        [768] |      768 |
+| code_encoder_q.encoder.layer.6.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.6.intermediate.dense.bias            |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.6.output.dense.weight                |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.6.output.dense.bias                  |        [768] |      768 |
+| code_encoder_q.encoder.layer.6.output.LayerNorm.weight            |        [768] |      768 |
+| code_encoder_q.encoder.layer.6.output.LayerNorm.bias              |        [768] |      768 |
+| code_encoder_q.encoder.layer.7.attention.self.query.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.7.attention.self.query.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.7.attention.self.key.weight          |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.7.attention.self.key.bias            |        [768] |      768 |
+| code_encoder_q.encoder.layer.7.attention.self.value.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.7.attention.self.value.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.7.attention.output.dense.weight      |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.7.attention.output.dense.bias        |        [768] |      768 |
+| code_encoder_q.encoder.layer.7.attention.output.LayerNorm.weight  |        [768] |      768 |
+| code_encoder_q.encoder.layer.7.attention.output.LayerNorm.bias    |        [768] |      768 |
+| code_encoder_q.encoder.layer.7.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.7.intermediate.dense.bias            |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.7.output.dense.weight                |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.7.output.dense.bias                  |        [768] |      768 |
+| code_encoder_q.encoder.layer.7.output.LayerNorm.weight            |        [768] |      768 |
+| code_encoder_q.encoder.layer.7.output.LayerNorm.bias              |        [768] |      768 |
+| code_encoder_q.encoder.layer.8.attention.self.query.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.8.attention.self.query.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.8.attention.self.key.weight          |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.8.attention.self.key.bias            |        [768] |      768 |
+| code_encoder_q.encoder.layer.8.attention.self.value.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.8.attention.self.value.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.8.attention.output.dense.weight      |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.8.attention.output.dense.bias        |        [768] |      768 |
+| code_encoder_q.encoder.layer.8.attention.output.LayerNorm.weight  |        [768] |      768 |
+| code_encoder_q.encoder.layer.8.attention.output.LayerNorm.bias    |        [768] |      768 |
+| code_encoder_q.encoder.layer.8.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.8.intermediate.dense.bias            |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.8.output.dense.weight                |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.8.output.dense.bias                  |        [768] |      768 |
+| code_encoder_q.encoder.layer.8.output.LayerNorm.weight            |        [768] |      768 |
+| code_encoder_q.encoder.layer.8.output.LayerNorm.bias              |        [768] |      768 |
+| code_encoder_q.encoder.layer.9.attention.self.query.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.9.attention.self.query.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.9.attention.self.key.weight          |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.9.attention.self.key.bias            |        [768] |      768 |
+| code_encoder_q.encoder.layer.9.attention.self.value.weight        |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.9.attention.self.value.bias          |        [768] |      768 |
+| code_encoder_q.encoder.layer.9.attention.output.dense.weight      |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.9.attention.output.dense.bias        |        [768] |      768 |
+| code_encoder_q.encoder.layer.9.attention.output.LayerNorm.weight  |        [768] |      768 |
+| code_encoder_q.encoder.layer.9.attention.output.LayerNorm.bias    |        [768] |      768 |
+| code_encoder_q.encoder.layer.9.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.9.intermediate.dense.bias            |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.9.output.dense.weight                |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.9.output.dense.bias                  |        [768] |      768 |
+| code_encoder_q.encoder.layer.9.output.LayerNorm.weight            |        [768] |      768 |
+| code_encoder_q.encoder.layer.9.output.LayerNorm.bias              |        [768] |      768 |
+| code_encoder_q.encoder.layer.10.attention.self.query.weight       |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.10.attention.self.query.bias         |        [768] |      768 |
+| code_encoder_q.encoder.layer.10.attention.self.key.weight         |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.10.attention.self.key.bias           |        [768] |      768 |
+| code_encoder_q.encoder.layer.10.attention.self.value.weight       |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.10.attention.self.value.bias         |        [768] |      768 |
+| code_encoder_q.encoder.layer.10.attention.output.dense.weight     |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.10.attention.output.dense.bias       |        [768] |      768 |
+| code_encoder_q.encoder.layer.10.attention.output.LayerNorm.weight |        [768] |      768 |
+| code_encoder_q.encoder.layer.10.attention.output.LayerNorm.bias   |        [768] |      768 |
+| code_encoder_q.encoder.layer.10.intermediate.dense.weight         |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.10.intermediate.dense.bias           |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.10.output.dense.weight               |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.10.output.dense.bias                 |        [768] |      768 |
+| code_encoder_q.encoder.layer.10.output.LayerNorm.weight           |        [768] |      768 |
+| code_encoder_q.encoder.layer.10.output.LayerNorm.bias             |        [768] |      768 |
+| code_encoder_q.encoder.layer.11.attention.self.query.weight       |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.11.attention.self.query.bias         |        [768] |      768 |
+| code_encoder_q.encoder.layer.11.attention.self.key.weight         |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.11.attention.self.key.bias           |        [768] |      768 |
+| code_encoder_q.encoder.layer.11.attention.self.value.weight       |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.11.attention.self.value.bias         |        [768] |      768 |
+| code_encoder_q.encoder.layer.11.attention.output.dense.weight     |   [768, 768] |   589824 |
+| code_encoder_q.encoder.layer.11.attention.output.dense.bias       |        [768] |      768 |
+| code_encoder_q.encoder.layer.11.attention.output.LayerNorm.weight |        [768] |      768 |
+| code_encoder_q.encoder.layer.11.attention.output.LayerNorm.bias   |        [768] |      768 |
+| code_encoder_q.encoder.layer.11.intermediate.dense.weight         |  [3072, 768] |  2359296 |
+| code_encoder_q.encoder.layer.11.intermediate.dense.bias           |       [3072] |     3072 |
+| code_encoder_q.encoder.layer.11.output.dense.weight               |  [768, 3072] |  2359296 |
+| code_encoder_q.encoder.layer.11.output.dense.bias                 |        [768] |      768 |
+| code_encoder_q.encoder.layer.11.output.LayerNorm.weight           |        [768] |      768 |
+| code_encoder_q.encoder.layer.11.output.LayerNorm.bias             |        [768] |      768 |
+| code_encoder_q.pooler.dense.weight                                |   [768, 768] |   589824 |
+| code_encoder_q.pooler.dense.bias                                  |        [768] |      768 |
++-------------------------------------------------------------------+--------------+----------+
+02/17/2024 15:12:09 - INFO - __main__ -   Training/evaluation parameters Namespace(agg_way='cls_pooler', aug_type_way='random_replace_type', code_length=64, codebase_file='dataset/ruby/codebase.jsonl', config_name='microsoft/unixcoder-base', couninue_pre_train_data_files=['dataset/java/train.jsonl', 'dataset/javascript/train.jsonl', 'dataset/python/train.jsonl', 'dataset/php/train.jsonl', 'dataset/go/train.jsonl', 'dataset/ruby/train.jsonl'], data_aug_type='other', data_flow_length=0, debug=False, device=device(type='cuda'), do_avg=False, do_continue_pre_trained=False, do_eval=False, do_fine_tune=False, do_ineer_loss=False, do_multi_lang_continue_pre_train=True, do_single_lang_continue_pre_train=False, do_test=True, do_train=False, do_whitening=False, do_zero_short=False, epoch=50, eval_batch_size=64, eval_data_file='dataset/ruby/valid.jsonl', eval_frequency=100, fp16=False, gradient_accumulation_steps=1, hidden_size=768, lang='ruby', learning_rate=2e-05, loaded_codebert_model_filename=None, loaded_model_filename=None, local_rank=-1, logging_steps=50, max_codeblock_num=10, max_grad_norm=1.0, max_steps=100000, mlm_probability=0.1, mlp=False, moco_dim=768, moco_k=1024, moco_m=0.999, moco_t=0.07, moco_type='encoder_queue', model_name_or_path='microsoft/unixcoder-base', model_type='multi-loss-cocosoda', n_debug_samples=100, n_gpu=2, nl_length=64, num_train_epochs=10, num_warmup_steps=0, only_save_the_nl_code_vec=False, output_dir='./saved_models/cocosoda/', print_align_unif_loss=False, save_evaluation_reuslt=False, save_evaluation_reuslt_dir=None, save_steps=1000, seed=123456, test_data_file='dataset/ruby/test.jsonl', time_score=1, tokenizer_name='microsoft/unixcoder-base', train_batch_size=128, train_data_file='dataset/ruby/train.jsonl', use_best_mrr_model=False, weight_decay=0.01)
+02/17/2024 15:15:03 - INFO - __main__ -   *** Example ***
+02/17/2024 15:15:03 - INFO - __main__ -   idx: 0
+02/17/2024 15:15:03 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', '@', '_Override', '_public', '_Image', 'Source', '_apply', '_(', '_Image', 'Source', '_input', '_)', '_{', '_final', '_int', '_[', '_]', '_[', '_]', '_pixel', 'Matrix', '_=', '_new', '_int', '_[', '_3', '_]', '_[', '_3', '_]', '_;', '_int', '_w', '_=', '_input', '_.', '_getWidth', '_(', '_)', '_;', '_int', '_h', '_=', '_input', '_.', '_getHeight', '_(', '_)', '_;', '_int', '_[', '_]', '_[', '_]', '_output', '_=', '_new', '_int', '_[', '_h', '_]', '</s>']
+02/17/2024 15:15:03 - INFO - __main__ -   code_ids: 0 6 2 150 19505 1240 6085 1768 5230 400 6085 1768 1586 743 399 1920 554 626 2406 626 2406 5578 3679 385 579 554 626 995 2406 626 995 2406 2476 554 477 385 1586 746 32671 400 743 2476 554 566 385 1586 746 32720 400 743 2476 554 626 2406 626 2406 1721 385 579 554 626 566 2406 2
+02/17/2024 15:15:03 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Expect', 's', '_a', '_height', '_mat', '_as', '_input', '</s>']
+02/17/2024 15:15:03 - INFO - __main__ -   nl_ids: 0 6 2 7871 201 434 3082 5772 880 1586 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:15:03 - INFO - __main__ -   *** Example ***
+02/17/2024 15:15:03 - INFO - __main__ -   idx: 1
+02/17/2024 15:15:03 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'public', '_<', '_L', 'extends', 'Listener', '_>', '_void', '_pop', 'Event', '_(', '_Event', '_<', '_?', '_,', '_L', '_>', '_expected', '_)', '_{', '_synchronized', '_(', '_this', '_.', '_stack', '_)', '_{', '_final', '_Event', '_<', '_?', '_,', '_?', '_>', '_actual', '_=', '_this', '_.', '_stack', '_.', '_pop', '_(', '_)', '_;', '_if', '_(', '_actual', '_!=', '_expected', '_)', '_{', '_throw', '_new', '_IllegalStateException', '_(', '_String', '_.', '_format', '_(', '"', 'Un', '</s>']
+02/17/2024 15:15:03 - INFO - __main__ -   code_ids: 0 6 2 653 517 747 13125 2486 711 723 5012 1089 400 3916 517 999 2019 747 711 2048 743 399 9401 400 547 746 3325 743 399 1920 3916 517 999 2019 999 711 3780 385 547 746 3325 746 5012 400 743 2476 462 400 3780 620 2048 743 399 1185 579 16219 400 1167 746 2021 400 120 965 2
+02/17/2024 15:15:03 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'P', 'ops', '_the', '_top', '_event', '_off', '_the', '_current', '_event', '_stack', '_.', '_This', '_action', '_has', '_to', '_be', '_performed', '_immediately', '_after', '_the', '_event', '_has', '_been', '_dispatched', '_to', '_all', '_listeners', '_.', '</s>']
+02/17/2024 15:15:03 - INFO - __main__ -   nl_ids: 0 6 2 166 2489 448 3194 1488 3413 448 1434 1488 3325 746 1600 2657 1559 508 661 13181 10086 2493 448 1488 1559 3022 43340 508 1345 11839 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:15:03 - INFO - __main__ -   *** Example ***
+02/17/2024 15:15:03 - INFO - __main__ -   idx: 2
+02/17/2024 15:15:03 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'protected', '_void', '_modify', '_(', '_Transaction', '_t', '_)', '_{', '_try', '_{', '_this', '_.', '_lock', '_.', '_write', 'Lock', '_(', '_)', '_.', '_lock', '_(', '_)', '_;', '_t', '_.', '_perform', '_(', '_)', '_;', '_}', '_finally', '_{', '_this', '_.', '_lock', '_.', '_write', 'Lock', '_(', '_)', '_.', '_unlock', '_(', '_)', '_;', '_}', '_}', '</s>']
+02/17/2024 15:15:03 - INFO - __main__ -   code_ids: 0 6 2 1933 723 8660 400 13081 422 743 399 1568 399 547 746 3505 746 2250 2896 400 743 746 3505 400 743 2476 422 746 4729 400 743 2476 425 6110 399 547 746 3505 746 2250 2896 400 743 746 14552 400 743 2476 425 425 2 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:15:03 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Executes', '_the', '_given', '_transaction', '_within', '_the', '_con', 'text', 'of', '_a', '_write', '_lock', '_.', '</s>']
+02/17/2024 15:15:03 - INFO - __main__ -   nl_ids: 0 6 2 40551 448 2076 4993 5289 448 549 625 757 434 2250 3505 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:15:59 - INFO - __main__ -   *** Example ***
+02/17/2024 15:15:59 - INFO - __main__ -   idx: 0
+02/17/2024 15:15:59 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'function', '_(', '_state', '_,', '_action', '_)', '_{', '_return', '__', '_.', '_defaults', '_(', '_{', '_isValid', 'ating', '_:', '_action', '_.', '_isValid', 'ating', '_,', '_last', 'Action', '_:', '_IS', '_', 'VALID', 'ATING', '_}', '_,', '_state', '_)', '_}', '</s>']
+02/17/2024 15:15:59 - INFO - __main__ -   code_ids: 0 6 2 618 400 1404 2019 2657 743 399 483 623 746 7470 400 399 17002 2335 545 2657 746 17002 2335 2019 2023 1888 545 1947 181 7477 40173 425 2019 1404 743 425 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:15:59 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Update', '_is', '_validating', '_result', '</s>']
+02/17/2024 15:15:59 - INFO - __main__ -   nl_ids: 0 6 2 2056 555 38924 1046 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:15:59 - INFO - __main__ -   *** Example ***
+02/17/2024 15:15:59 - INFO - __main__ -   idx: 1
+02/17/2024 15:15:59 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'function', '_add', 'Widget', 'For', 'Filter', '_(', '_view', '_,', '_filter', '_,', '_edit', 'Mode', 'Hint', '_)', '_{', '_var', '_grid', 'ster', '_=', '_view', '_.', '__', 'widgets', 'Grid', 'ster', '_;', '_var', '_row', '_=', '_filter', '_.', '_row', '_||', '_1', '_;', '_var', '_col', '_=', '_filter', '_.', '_col', '_||', '_1', '_;', '_var', '_size', 'X', '_=', '_filter', '_.', '_size', '_', 'x', '_||', '_3', '_;', '_var', '_size', 'Y', '_=', '</s>']
+02/17/2024 15:15:59 - INFO - __main__ -   code_ids: 0 6 2 618 1103 3104 1459 2274 400 2859 2019 2866 2019 7277 1649 7641 743 399 660 6335 7400 385 2859 746 623 14718 3981 7400 2476 660 2562 385 2866 746 2562 853 524 2476 660 1253 385 2866 746 1253 853 524 2476 660 1014 174 385 2866 746 1014 181 206 853 995 2476 660 1014 175 385 2
+02/17/2024 15:15:59 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Add', '_a', '_widget', '_to', '_the', '_analyze', '_page', '_for', '_the', '_given', '_filter', '</s>']
+02/17/2024 15:15:59 - INFO - __main__ -   nl_ids: 0 6 2 972 434 6949 508 448 25087 2303 563 448 2076 2866 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:15:59 - INFO - __main__ -   *** Example ***
+02/17/2024 15:15:59 - INFO - __main__ -   idx: 2
+02/17/2024 15:15:59 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'function', '_in', 'Range', '_(', '_value', '_,', '_min', '_,', '_max', '_)', '_{', '_const', '_int', '_=', '_parseInt', '_(', '_value', '_,', '_10', '_)', '_return', '_(', '_`', '_${', '_int', '_}', '_`', '_===', '_`', '_${', '_value', '_.', '_replace', '_(', '_/', '_^', '0', '_/', '_,', "_''", '_)', '_}', '_`', '_&&', '_int', '_>=', '_min', '_&&', '_int', '_<=', '_max', '_)', '_}', '</s>']
+02/17/2024 15:15:59 - INFO - __main__ -   code_ids: 0 6 2 618 488 2228 400 767 2019 2069 2019 1621 743 399 925 554 385 9998 400 767 2019 1865 743 483 400 1222 5593 554 425 1222 1246 1222 5593 767 746 4126 400 1017 3855 134 1017 2019 3606 743 425 1222 698 554 1451 2069 698 554 1826 1621 743 425 2 1 1 1 1 1 1 1
+02/17/2024 15:15:59 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Determine', '_if', '_value', '_is', '_within', '_a', '_numeric', '_range', '</s>']
+02/17/2024 15:15:59 - INFO - __main__ -   nl_ids: 0 6 2 17591 462 767 555 5289 434 10397 1780 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:19:33 - INFO - __main__ -   *** Example ***
+02/17/2024 15:19:33 - INFO - __main__ -   idx: 0
+02/17/2024 15:19:33 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'def', '_split', '_', 'phy', 'log', 'en', 'y', '_(', '_p', '_,', '_level', '_=', '"', 's', '"', ')', '_:', '_level', '_=', '_level', '_+', '"', '__', '"', 'result', '_=', '_p', '_.', '_split', '_(', '_level', '_)', '_return', '_result', '_[', '_0', '_]', '_+', '_level', '_+', '_result', '_[', '_1', '_]', '_.', '_split', '_(', '"', ';', '"', ')', '_[', '_0', '_]', '</s>']
+02/17/2024 15:19:33 - INFO - __main__ -   code_ids: 0 6 2 729 5192 181 3258 896 386 207 400 428 2019 3144 385 120 201 120 127 545 3144 385 3144 513 120 876 120 1125 385 428 746 5192 400 3144 743 483 1046 626 461 2406 513 3144 513 1046 626 524 2406 746 5192 400 120 145 120 127 626 461 2406 2 1 1 1 1 1 1
+02/17/2024 15:19:33 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Return', '_either', '_the', '_full', '_or', '_truncated', '_version', '_of', '_a', '_Q', 'II', 'ME', '_-', '_formatted', '_taxonomy', 'string', '.', '</s>']
+02/17/2024 15:19:33 - INFO - __main__ -   nl_ids: 0 6 2 1675 4759 448 3662 872 19307 2229 595 434 1152 4300 1098 581 10440 29021 571 132 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:19:33 - INFO - __main__ -   *** Example ***
+02/17/2024 15:19:33 - INFO - __main__ -   idx: 1
+02/17/2024 15:19:33 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'def', '_ensure', '_', 'dir', '_(', '_d', '_)', '_:', '_if', '_not', '_os', '_.', '_path', '_.', '_exists', '_(', '_d', '_)', '_:', '_try', '_:', '_os', '_.', '_m', 'akedirs', '_(', '_d', '_)', '_except', '_OSError', '_as', '_oe', '_:', '_#', '_should', '_not', '_happen', '_with', '_os', '.', 'makedirs', '_#', '_ENOENT', ':', '_No', '_such', '_file', '_or', '_directory', '_if', '_os', '_.', '_errno', '_==', '_errno', '_.', '_ENOENT', '_:', '_msg', '_=', '</s>']
+02/17/2024 15:19:33 - INFO - __main__ -   code_ids: 0 6 2 729 6229 181 1282 400 480 743 545 462 800 2215 746 1391 746 4534 400 480 743 545 1568 545 2215 746 446 23328 400 480 743 3552 22934 880 44902 545 830 1570 800 7564 918 2215 132 24429 830 41059 144 4038 5632 1012 872 3456 462 2215 746 2341 550 2341 746 41059 545 2345 385 2
+02/17/2024 15:19:33 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Check', '_to', '_make', '_sure', '_the', '_supplied', '_directory', '_path', '_does', '_not', '_exist', '_if', '_so', '_create', '_it', '_.', '_The', '_method', '_catch', 'es', '_OSError', '_exceptions', '_and', '_returns', '_a', '_desc', 'riptive', '_message', '_instead', '_of', '_re', '_-', '_raising', '_the', '_error', '_.', '</s>']
+02/17/2024 15:19:33 - INFO - __main__ -   nl_ids: 0 6 2 1749 508 2002 3984 448 8813 3456 1391 2129 800 3040 462 1769 1738 835 746 1044 1454 2092 482 22934 12300 706 2060 434 2162 44105 1841 4488 595 479 581 47183 448 843 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:19:33 - INFO - __main__ -   *** Example ***
+02/17/2024 15:19:33 - INFO - __main__ -   idx: 2
+02/17/2024 15:19:33 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'def', '_file', '_', 'handle', '_(', '_fn', 'h', '_,', '_mode', '_=', '"', 'r', 'U', '"', ')', '_:', '_handle', '_=', '_None', '_if', '_isinstance', '_(', '_fn', 'h', '_,', '_file', '_)', '_:', '_if', '_fn', 'h', '_.', '_closed', '_:', '_raise', '_ValueError', '_(', '"', 'Input', '_file', '_is', '_closed', '.', '"', ')', '_handle', '_=', '_fn', 'h', '_elif', '_isinstance', '_(', '_fn', 'h', '_,', '_str', '_)', '_:', '_handle', '_=', '</s>']
+02/17/2024 15:19:33 - INFO - __main__ -   code_ids: 0 6 2 729 1012 181 2133 400 4065 190 2019 2119 385 120 200 171 120 127 545 2384 385 1938 462 5408 400 4065 190 2019 1012 743 545 462 4065 190 746 8264 545 3085 6052 400 120 1834 1012 555 8264 132 120 127 2384 385 4065 190 3625 5408 400 4065 190 2019 1113 743 545 2384 385 2
+02/17/2024 15:19:33 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Takes', '_either', '_a', '_file', '_path', '_or', '_an', '_open', '_file', '_handle', '_checks', '_validity', '_and', '_returns', '_an', '_open', '_file', '_handle', '_or', '_raises', '_an', '_appropriate', '_Exception', '_.', '</s>']
+02/17/2024 15:19:33 - INFO - __main__ -   nl_ids: 0 6 2 27408 4759 434 1012 1391 872 817 2717 1012 2384 7825 25911 706 2060 817 2717 1012 2384 872 23154 817 7900 2654 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:23:02 - INFO - __main__ -   *** Example ***
+02/17/2024 15:23:02 - INFO - __main__ -   idx: 0
+02/17/2024 15:23:02 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'public', '_function', '_on', 'Channel', 'Pre', 'Delete', '_(', '_Resource', 'Controller', 'Event', '_$', '_event', '_)', '_:', '_void', '_{', '_$', '_channel', '_=', '_$', '_event', '_->', '_get', 'Subject', '_(', '_)', '_;', '_if', '_(', '_!', '_$', '_channel', '_instanceof', '_Channel', 'Interface', '_)', '_{', '_throw', '_new', '_Unexpected', 'TypeException', '_(', '_$', '_channel', '_,', '_Channel', 'Interface', '_::', 'class', ')', '_;', '_}', '_$', '_results', '_=', '_$', '_this', '_->', '_channel', 'Repository', '</s>']
+02/17/2024 15:23:02 - INFO - __main__ -   code_ids: 0 6 2 653 603 854 3267 1782 2843 400 7606 3357 1089 440 1488 743 545 723 399 440 3225 385 440 1488 1703 744 7562 400 743 2476 462 400 552 440 3225 3052 11322 2285 743 399 1185 579 23297 48098 400 440 3225 2019 11322 2285 5431 1149 127 2476 425 440 3286 385 440 547 1703 3225 5674 2
+02/17/2024 15:23:02 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Prevent', '_channel', '_deletion', '_if', '_no', '_more', '_channels', '_enabled', '_.', '</s>']
+02/17/2024 15:23:02 - INFO - __main__ -   nl_ids: 0 6 2 42669 3225 19744 462 1375 2726 8630 5334 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:23:02 - INFO - __main__ -   *** Example ***
+02/17/2024 15:23:02 - INFO - __main__ -   idx: 1
+02/17/2024 15:23:02 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'public', '_function', '_get', 'Tax', 'Total', '_(', '_)', '_:', '_int', '_{', '_$', '_tax', 'Total', '_=', '_0', '_;', '_foreach', '_(', '_$', '_this', '_->', '_get', 'Adjustments', '_(', '_Adjust', 'ment', 'Interface', '_::', '_T', 'AX', '_', 'ADJUST', 'MENT', '_)', '_as', '_$', '_tax', 'Adjustment', '_)', '_{', '_$', '_tax', 'Total', '_+=', '_$', '_tax', 'Adjustment', '_->', '_get', 'Amount', '_(', '_)', '_;', '_}', '_foreach', '_(', '_$', '_this', '_->', '_units', '</s>']
+02/17/2024 15:23:02 - INFO - __main__ -   code_ids: 0 6 2 653 603 744 11266 4703 400 743 545 554 399 440 14990 4703 385 461 2476 2315 400 440 547 1703 744 39930 400 16203 564 2285 5431 515 3383 181 44094 4332 743 880 440 14990 21585 743 399 440 14990 4703 1054 440 14990 21585 1703 744 6933 400 743 2476 425 2315 400 440 547 1703 10931 2
+02/17/2024 15:23:02 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Returns', '_sum', '_of', '_ne', 'utral', '_and', '_non', '_ne', 'utral', '_tax', '_adjust', 'ments', '_on', '_order', '_item', '_and', '_total', '_tax', '_of', '_units', '_.', '</s>']
+02/17/2024 15:23:02 - INFO - __main__ -   nl_ids: 0 6 2 2853 3863 595 1472 22943 706 2514 1472 22943 14990 7780 2067 854 2991 1573 706 3704 14990 595 10931 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:23:02 - INFO - __main__ -   *** Example ***
+02/17/2024 15:23:02 - INFO - __main__ -   idx: 2
+02/17/2024 15:23:02 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'private', '_function', '_is', 'Last', 'Enabled', 'Entity', '_(', '_$', '_result', '_,', '_$', '_entity', '_)', '_:', '_bool', '_{', '_return', '_!', '_$', '_result', '_||', '_0', '_===', '_count', '_(', '_$', '_result', '_)', '_||', '_(', '_1', '_===', '_count', '_(', '_$', '_result', '_)', '_&&', '_$', '_entity', '_===', '_(', '_$', '_result', '_instanceof', '_\\', '_Iterator', '_?', '_$', '_result', '_->', '_current', '_(', '_)', '_:', '_current', '_(', '_$', '_result', '_)', '</s>']
+02/17/2024 15:23:02 - INFO - __main__ -   code_ids: 0 6 2 1335 603 555 2954 3060 2268 400 440 1046 2019 440 4498 743 545 1223 399 483 552 440 1046 853 461 1246 1752 400 440 1046 743 853 400 524 1246 1752 400 440 1046 743 698 440 4498 1246 400 440 1046 3052 1216 13119 999 440 1046 1703 1434 400 743 545 1434 400 440 1046 743 2
+02/17/2024 15:23:02 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'If', '_no', '_entity', '_matched', '_the', '_query', '_criteria', '_or', '_a', '_single', '_entity', '_matched', '_which', '_is', '_the', '_same', '_as', '_the', '_entity', '_being', '_validated', '_the', '_entity', '_is', '_the', '_last', '_enabled', '_entity', '_available', '_.', '</s>']
+02/17/2024 15:23:02 - INFO - __main__ -   nl_ids: 0 6 2 2815 1375 4498 5865 448 2616 14677 872 434 3501 4498 5865 1839 555 448 2641 880 448 4498 4251 20709 448 4498 555 448 2023 5334 4498 3777 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:25:32 - INFO - __main__ -   *** Example ***
+02/17/2024 15:25:32 - INFO - __main__ -   idx: 0
+02/17/2024 15:25:32 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'func', '_getAll', 'Dep', 'Types', '_(', '_)', '_[', '_]', 'string', '{', '_dep', 'Types', '_:=', '_make', '_(', '_[', '_]', 'string', ',', '_0', '_,', '_len', '_(', '_cmds', '_)', '_)', '_Ċ', '_for', '_dep', 'Type', '_:=', '_range', '_cmds', '_{', '_dep', 'Types', '_=', '_append', '_(', '_dep', 'Types', '_,', '_dep', 'Type', '_)', '_Ċ', '_}', '_Ċ', '_sort', '_.', '_Strings', '_(', '_dep', 'Types', '_)', '_Ċ', '_return', '_dep', 'Types', '_Ċ', '</s>']
+02/17/2024 15:25:32 - INFO - __main__ -   code_ids: 0 6 2 763 21556 15010 2531 400 743 626 2406 571 209 13994 2531 716 2002 400 626 2406 571 130 461 2019 1015 400 22803 743 743 1022 563 13994 641 716 1780 22803 399 13994 2531 385 2746 400 13994 2531 2019 13994 641 743 1022 425 1022 4821 746 23012 400 13994 2531 743 1022 483 13994 2531 1022 2
+02/17/2024 15:25:32 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'getAll', 'Dep', 'Types', '_returns', '_a', '_sorted', '_list', '_of', 'name', 's', '_of', '_all', '_dep', '_type', '_commands', '_.', '</s>']
+02/17/2024 15:25:32 - INFO - __main__ -   nl_ids: 0 6 2 12199 15010 2531 2060 434 6977 1182 595 616 201 595 1345 13994 889 7997 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:25:32 - INFO - __main__ -   *** Example ***
+02/17/2024 15:25:32 - INFO - __main__ -   idx: 1
+02/17/2024 15:25:32 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'func', '_get', 'Io', 'Progress', 'Reader', '_(', '_label', 'string', ',', '_res', '_*', '_http', '_.', '_Response', '_)', '_io', '_.', '_Reader', '_{', '_prefix', '_:=', '"', '"', '+', '_label', '_Ċ', '_fmt', 'Bytes', 'Size', '_:=', '_18', '_Ċ', '_bar', 'Size', '_:=', '_int', '64', '_(', '_80', '_-', '_len', '_(', '_prefix', '_)', '_-', '_fmt', 'Bytes', 'Size', '_)', '_Ċ', '_bar', '_:=', '_i', 'opro', 'gress', '_.', '_Draw', 'Text', 'Format', 'Bar', '</s>']
+02/17/2024 15:25:32 - INFO - __main__ -   code_ids: 0 6 2 763 744 8499 4909 2692 400 2649 571 130 705 426 2014 746 6397 743 3095 746 15471 399 3603 716 120 120 129 2649 1022 2771 2240 939 716 7837 1022 5252 939 716 554 848 400 8967 581 1015 400 3603 743 581 2771 2240 939 743 1022 5252 716 548 31375 2639 746 8548 1072 1660 3238 2
+02/17/2024 15:25:32 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'get', 'Io', 'Progress', 'Reader', '_returns', '_a', '_reader', '_that', '_wraps', '_the', '_HTTP', '_response', '_body', '_so', '_it', '_prints', '_a', '_pretty', '_progress', '_bar', '_when', '_reading', '_data', '_from', '_it', '_.', '</s>']
+02/17/2024 15:25:32 - INFO - __main__ -   nl_ids: 0 6 2 459 8499 4909 2692 2060 434 4636 922 28232 448 4383 1925 3444 1769 835 22199 434 15344 6687 5252 1672 8267 869 1029 835 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:25:32 - INFO - __main__ -   *** Example ***
+02/17/2024 15:25:32 - INFO - __main__ -   idx: 2
+02/17/2024 15:25:32 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'func', '_(', '_f', '_*', '_remove', 'OnClose', '_)', '_Close', '_(', '_)', '_error', '_{', '_if', '_f', '_==', '_nil', '_||', '_f', '_.', '_File', '_==', '_nil', '_{', '_return', '_nil', '_Ċ', '_}', 'name', ':', '=', '_f', '_.', '_File', '_.', '_Name', '_(', '_)', '_Ċ', '_if', '_err', '_:=', '_f', '_.', '_File', '_.', '_Close', '_(', '_)', '_;', '_err', '_!=', '_nil', '_{', '_return', '_err', '_Ċ', '_}', '_Ċ', '_if', '_err', '</s>']
+02/17/2024 15:25:32 - INFO - __main__ -   code_ids: 0 6 2 763 400 412 426 3033 45359 743 5832 400 743 843 399 462 412 550 845 853 412 746 2536 550 845 399 483 845 1022 425 616 144 147 412 746 2536 746 3725 400 743 1022 462 573 716 412 746 2536 746 5832 400 743 2476 573 620 845 399 483 573 1022 425 1022 462 573 2
+02/17/2024 15:25:32 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Close', '_closes', '_the', '_file', '_and', '_then', '_removes', '_it', '_from', '_disk', '_.', '_No', '_error', '_is', '_returned', '_if', '_the', '_file', '_did', '_not', '_exist', '_at', '_the', '_point', '_of', '_removal', '_.', '</s>']
+02/17/2024 15:25:32 - INFO - __main__ -   nl_ids: 0 6 2 3108 19735 448 1012 706 2270 15719 835 1029 8236 746 4038 843 555 2862 462 448 1012 6088 800 3040 1035 448 1704 595 23066 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:25:48 - INFO - __main__ -   *** Example ***
+02/17/2024 15:25:48 - INFO - __main__ -   idx: 0
+02/17/2024 15:25:48 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'def', '_render', '_', 'body', '_(', '_con', 'text', ',', '_options', '_)', '_if', '_options', '_.', '_key', '?', '_(', '_:', 'partial', '_)', '_[', '_render', '_', 'partial', '_(', '_con', 'text', ',', '_options', '_)', '_]', '_else', '_Streaming', 'Template', 'Renderer', '_.', '_new', '_(', '_@', 'lookup', '_', 'con', 'text', ')', '_.', '_render', '_(', '_con', 'text', ',', '_options', '_)', '_end', '_end', '</s>']
+02/17/2024 15:25:48 - INFO - __main__ -   code_ids: 0 6 2 729 4342 181 1995 400 549 625 130 1466 743 462 1466 746 1129 149 400 545 7609 743 626 4342 181 7609 400 549 625 130 1466 743 2406 669 47128 3057 6412 746 579 400 890 4961 181 525 625 127 746 4342 400 549 625 130 1466 743 1013 1013 2 1 1 1 1 1 1 1
+02/17/2024 15:25:48 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Render', '_but', '_returns', '_a', '_valid', '_R', 'ack', '_body', '_.', '_If', '_fib', 'ers', '_are', '_defined', '_we', '_return', '_a', '_streaming', '_body', '_that', '_renders', '_the', '_template', '_piece', '_by', '_piece', '_.', '</s>']
+02/17/2024 15:25:48 - INFO - __main__ -   nl_ids: 0 6 2 3726 2107 2060 434 1976 821 598 3444 746 1359 24766 560 1147 3474 937 483 434 22676 3444 922 40840 448 3636 18781 1243 18781 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:25:48 - INFO - __main__ -   *** Example ***
+02/17/2024 15:25:48 - INFO - __main__ -   idx: 1
+02/17/2024 15:25:48 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'def', '_attribute', '_', 'missing', '_(', '_match', '_,', '_*', '_args', '_,', '_&', '_block', '_)', '___', 'send', '__', '_(', '_match', '_.', '_target', '_,', '_match', '_.', '_attr', '_', 'name', ',', '_args', '_,', '_block', '_)', '_end', '</s>']
+02/17/2024 15:25:48 - INFO - __main__ -   code_ids: 0 6 2 729 2416 181 8487 400 1655 2019 426 1822 2019 519 1818 743 1267 2414 876 400 1655 746 1744 2019 1655 746 3526 181 616 130 1822 2019 1818 743 1013 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:25:48 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', '+', '_attribute', '_', 'missing', '_+', '_is', '_like', '_+', '_method', '_', 'missing', '_+', '_but', '_for', '_attributes', '_.', '_When', '_+', '_method', '_', 'missing', '_+', '_is', '_called', '_we', '_check', '_to', '_see', '_if', '_there', '_is', '_a', '_matching', '_attribute', '_method', '_.', '_If', '_so', '_we', '_tell', '_+', '_attribute', '_', 'missing', '_+', '_to', '_dispatch', '_the', '_attribute', '_.', '_This', '_method', '_can', '_be', '_overloaded', '_to', '_customize', '_the', '_behavior', '_.', '</s>']
+02/17/2024 15:25:48 - INFO - __main__ -   nl_ids: 0 6 2 129 2416 181 8487 513 555 4401 513 1454 181 8487 513 2107 563 4402 746 5919 513 1454 181 8487 513 555 2953 937 1382 508 3986 462 2550 555 434 6506 2416 1454 746 1359 1769 937 11931 513 2416 181 8487 513 508 9363 448 2416 746 1600 1454 1347 661 45869 508 36145 448 9050 746 2
+02/17/2024 15:25:48 - INFO - __main__ -   *** Example ***
+02/17/2024 15:25:48 - INFO - __main__ -   idx: 2
+02/17/2024 15:25:48 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'def', '_matched', '_', 'attribute', '_', 'method', '_(', '_method', '_', 'name', ')', '_matches', '_=', '_self', '_.', 'class', '.', '_send', '_(', '_:', 'attribute', '_', 'method', '_', 'matchers', '_', 'matching', '_,', '_method', '_', 'name', ')', '_matches', '_.', '_detect', '_{', '_|', '_match', '_|', '_attribute', '_', 'method', '?', '_(', '_match', '_.', '_attr', '_', 'name', ')', '_}', '_end', '</s>']
+02/17/2024 15:25:48 - INFO - __main__ -   code_ids: 0 6 2 729 5865 181 2163 181 1521 400 1454 181 616 127 5288 385 1358 746 1149 132 2904 400 545 2163 181 1521 181 38734 181 13575 2019 1454 181 616 127 5288 746 10241 399 649 1655 649 2416 181 1521 149 400 1655 746 3526 181 616 127 425 1013 2 1 1 1 1 1 1 1 1
+02/17/2024 15:25:48 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Returns', '_a', '_struct', '_representing', '_the', '_matching', '_attribute', '_method', '_.', '_The', '_struct', '_s', '_attributes', '_are', '_prefix', '_base', '_and', '_suffix', '_.', '</s>']
+02/17/2024 15:25:48 - INFO - __main__ -   nl_ids: 0 6 2 2853 434 1277 8466 448 6506 2416 1454 746 1044 1277 431 4402 1147 3603 1712 706 8436 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 15:25:48 - INFO - __main__ -   ***** Running training *****
+02/17/2024 15:25:48 - INFO - __main__ -     Num examples = 908224
+02/17/2024 15:25:48 - INFO - __main__ -     Num Epochs = 10
+02/17/2024 15:25:48 - INFO - __main__ -     Num quene = 1024
+02/17/2024 15:25:48 - INFO - __main__ -     Instantaneous batch size per GPU = 64
+02/17/2024 15:25:48 - INFO - __main__ -     Total train batch size  = 128
+Traceback (most recent call last):
+  File "run.py", line 1200, in <module>
+    main()
+  File "run.py", line 1160, in main
+    multi_lang_continue_pre_train(args, model, tokenizer, pool)
+  File "run.py", line 756, in multi_lang_continue_pre_train
+    nl_q=nl_inputs , nl_k=nl_transformations_ids )
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 168, in forward
+    outputs = self.parallel_apply(replicas, inputs, kwargs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 178, in parallel_apply
+    return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 86, in parallel_apply
+    output.reraise()
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/_utils.py", line 434, in reraise
+    raise exception
+UnboundLocalError: Caught UnboundLocalError in replica 0 on device 0.
+Original Traceback (most recent call last):
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/parallel/parallel_apply.py", line 61, in _worker
+    output = module(*input, **kwargs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/home/yiming/cocosoda/CoCoSoDa/model.py", line 235, in forward
+    code_q  = torch.nn.functional.normalize(code_q, p=2, dim=1)
+UnboundLocalError: local variable 'code_q' referenced before assignment

saved_models/codesearch_contrastive_learning/Model/Epoch_1/base_dvi.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1396d58628aa89cf55b533e8c813f633582c8f2b77c210189bfcf355005fcc28
+size 9506759

saved_models/codesearch_contrastive_learning/Model/Epoch_1/bgimg.png ADDED Viewed

saved_models/codesearch_contrastive_learning/Model/Epoch_1/embedding.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:085bb5f7efe704f484da4f7603c28b58486e62822fbd89e145a2084e3d86f437
+size 199544

saved_models/codesearch_contrastive_learning/Model/Epoch_1/index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

saved_models/codesearch_contrastive_learning/Model/Epoch_1/scale.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1e3a9ad91c1421ffc446780a7cd7227fca32e1d19cd04388819496d7cfea4d1
+size 144

saved_models/codesearch_contrastive_learning/Model/Epoch_1/subject_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e507b62594fe1d7cebbabf653d559acb9e9a72f9d31b57ea53721bb52e26d228
+size 1524384769

saved_models/codesearch_contrastive_learning/Model/Epoch_1/test_data.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:206096f239b771e8d12a88ac196562ac6a760fd050bbd1abec8e3f8c42c9da8a
+size 3873920

saved_models/codesearch_contrastive_learning/Model/Epoch_1/train_data.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f108ee34bab6336566ee678a151b507bc739f608d59c9c043a2ad60148ac6002
+size 76575872

saved_models/codesearch_contrastive_learning/Model/Epoch_2/base_dvi.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4539a62bfb97589fc21289309bca1128a3b2e57310a5f041e9ee8f5a5c438eb0
+size 9506759

saved_models/codesearch_contrastive_learning/Model/Epoch_2/bgimg.png ADDED Viewed

saved_models/codesearch_contrastive_learning/Model/Epoch_2/embedding.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3664112c7d7b3ec5263c4ae0400bf3cc9229c5337cf5ce7e11c63180a07c43e
+size 199544

saved_models/codesearch_contrastive_learning/Model/Epoch_2/index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

saved_models/codesearch_contrastive_learning/Model/Epoch_2/scale.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1341dfba85e04a8a7bbbc37f743f425a19dd6b2bc436329197fe93581fbe41c9
+size 144

saved_models/codesearch_contrastive_learning/Model/Epoch_2/subject_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e507b62594fe1d7cebbabf653d559acb9e9a72f9d31b57ea53721bb52e26d228
+size 1524384769

saved_models/codesearch_contrastive_learning/Model/Epoch_2/test_data.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5b836191dbb4c7a6ed3e9a762a859a9bd3f4ec6eefc9f5fb31adcc2bea76d88
+size 3873920

saved_models/codesearch_contrastive_learning/Model/Epoch_2/train_data.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15a96c83f0154fe93e57987c99b3ad060558e85bb33bcd347cf195c1280cd6bd
+size 76575872

saved_models/codesearch_contrastive_learning/Model/__pycache__/model.cpython-37.pyc ADDED Viewed

Binary file (8.81 kB). View file

saved_models/codesearch_contrastive_learning/Model/__pycache__/model.cpython-38.pyc ADDED Viewed

Binary file (8.48 kB). View file

saved_models/codesearch_contrastive_learning/Model/model-cs.py ADDED Viewed

	@@ -0,0 +1,396 @@

+import torch
+import torch.nn as nn
+from prettytable import PrettyTable
+from torch.nn.modules.activation import Tanh
+import copy
+import logging
+logger = logging.getLogger(__name__)
+from transformers import (WEIGHTS_NAME, AdamW, get_linear_schedule_with_warmup,
+                  RobertaConfig, RobertaModel, RobertaTokenizer)
+def whitening_torch_final(embeddings):
+    mu = torch.mean(embeddings, dim=0, keepdim=True)
+    cov = torch.mm((embeddings - mu).t(), embeddings - mu)
+    u, s, vt = torch.svd(cov)
+    W = torch.mm(u, torch.diag(1/torch.sqrt(s)))
+    embeddings = torch.mm(embeddings - mu, W)
+    return embeddings
+class BaseModel(nn.Module):
+    def __init__(self, ):
+        super().__init__()
+    def model_parameters(self):
+        table = PrettyTable()
+        table.field_names = ["Layer Name", "Output Shape", "Param #"]
+        table.align["Layer Name"] = "l"
+        table.align["Output Shape"] = "r"
+        table.align["Param #"] = "r"
+        for name, parameters in self.named_parameters():
+            if parameters.requires_grad:
+                table.add_row([name, str(list(parameters.shape)), parameters.numel()])
+        return table
+class Model(BaseModel):
+    def __init__(self, encoder):
+        super(Model, self).__init__()
+        self.encoder = encoder
+    def forward(self, code_inputs=None, nl_inputs=None):
+        # code_inputs [bs, seq]
+        if code_inputs is not None:
+            outputs = self.encoder(code_inputs,attention_mask=code_inputs.ne(1))[0] #[bs, seq_len, dim]
+            outputs = (outputs*code_inputs.ne(1)[:,:,None]).sum(1)/code_inputs.ne(1).sum(-1)[:,None] # None作为ndarray或tensor的索引作用是增加维度，
+            return torch.nn.functional.normalize(outputs, p=2, dim=1)
+        else:
+            outputs = self.encoder(nl_inputs,attention_mask=nl_inputs.ne(1))[0]
+            outputs = (outputs*nl_inputs.ne(1)[:,:,None]).sum(1)/nl_inputs.ne(1).sum(-1)[:,None]
+            return torch.nn.functional.normalize(outputs, p=2, dim=1)
+class Multi_Loss_CoCoSoDa( BaseModel):
+    def __init__(self, base_encoder, args, mlp=False):
+        super(Multi_Loss_CoCoSoDa, self).__init__()
+        self.K = args.moco_k
+        self.m = args.moco_m
+        self.T = args.moco_t
+        dim= args.moco_dim
+        # create the encoders
+        # num_classes is the output fc dimension
+        self.code_encoder_q = base_encoder
+        self.code_encoder_k = copy.deepcopy(base_encoder)
+        self.nl_encoder_q = base_encoder
+        # self.nl_encoder_q = RobertaModel.from_pretrained("roberta-base")
+        self.nl_encoder_k = copy.deepcopy(self.nl_encoder_q)
+        self.mlp = mlp
+        self.time_score= args.time_score
+        self.do_whitening = args.do_whitening
+        self.do_ineer_loss = args.do_ineer_loss
+        self.agg_way = args.agg_way
+        self.args = args
+        for param_q, param_k in zip(self.code_encoder_q.parameters(), self.code_encoder_k.parameters()):
+            param_k.data.copy_(param_q.data)  # initialize
+            param_k.requires_grad = False  # not update by gradient
+        for param_q, param_k in zip(self.nl_encoder_q.parameters(), self.nl_encoder_k.parameters()):
+            param_k.data.copy_(param_q.data)  # initialize
+            param_k.requires_grad = False  # not update by gradient
+        # create the code queue
+        torch.manual_seed(3047)
+        torch.cuda.manual_seed(3047)
+        self.register_buffer("code_queue", torch.randn(dim,self.K ))
+        self.code_queue = nn.functional.normalize(self.code_queue, dim=0)
+        self.register_buffer("code_queue_ptr", torch.zeros(1, dtype=torch.long))
+        # create the masked code queue
+        self.register_buffer("masked_code_queue", torch.randn(dim, self.K ))
+        self.masked_code_queue = nn.functional.normalize(self.masked_code_queue, dim=0)
+        self.register_buffer("masked_code_queue_ptr", torch.zeros(1, dtype=torch.long))
+        # create the nl queue
+        self.register_buffer("nl_queue", torch.randn(dim, self.K ))
+        self.nl_queue = nn.functional.normalize(self.nl_queue, dim=0)
+        self.register_buffer("nl_queue_ptr", torch.zeros(1, dtype=torch.long))
+        # create the masked nl  queue
+        self.register_buffer("masked_nl_queue", torch.randn(dim, self.K ))
+        self.masked_nl_queue= nn.functional.normalize(self.masked_nl_queue, dim=0)
+        self.register_buffer("masked_nl_queue_ptr", torch.zeros(1, dtype=torch.long))
+    @torch.no_grad()
+    def _momentum_update_key_encoder(self):
+        """
+        Momentum update of the key encoder
+        % key encoder的Momentum update
+        """
+        for param_q, param_k in zip(self.code_encoder_q.parameters(), self.code_encoder_k.parameters()):
+            param_k.data = param_k.data * self.m + param_q.data * (1. - self.m)
+        for param_q, param_k in zip(self.nl_encoder_q.parameters(), self.nl_encoder_k.parameters()):
+            param_k.data = param_k.data * self.m + param_q.data * (1. - self.m)
+        if self.mlp:
+            for param_q, param_k in zip(self.code_encoder_q_fc.parameters(), self.code_encoder_k_fc.parameters()):
+                param_k.data = param_k.data * self.m + param_q.data * (1. - self.m)
+            for param_q, param_k in zip(self.nl_encoder_q_fc.parameters(), self.nl_encoder_k_fc.parameters()):
+                param_k.data = param_k.data * self.m + param_q.data * (1. - self.m)
+    @torch.no_grad()
+    def _dequeue_and_enqueue(self, keys, option='code'):
+        # gather keys before updating queue
+        # keys = concat_all_gather(keys)
+        batch_size = keys.shape[0]
+        if option == 'code':
+            code_ptr = int(self.code_queue_ptr)
+            assert self.K % batch_size == 0  # for simplicity
+            # replace the keys at ptr (dequeue and enqueue)
+            try:
+                self.code_queue[:, code_ptr:code_ptr + batch_size] = keys.T
+            except:
+                print(code_ptr)
+                print(batch_size)
+                print(keys.shape)
+                exit(111)
+            code_ptr = (code_ptr + batch_size) % self.K  # move pointer  ptr->pointer
+            self.code_queue_ptr[0] = code_ptr
+        elif option == 'masked_code':
+            masked_code_ptr = int(self.masked_code_queue_ptr)
+            assert self.K % batch_size == 0  # for simplicity
+            # replace the keys at ptr (dequeue and enqueue)
+            try:
+                self.masked_code_queue[:, masked_code_ptr:masked_code_ptr + batch_size] = keys.T
+            except:
+                print(masked_code_ptr)
+                print(batch_size)
+                print(keys.shape)
+                exit(111)
+            masked_code_ptr = (masked_code_ptr + batch_size) % self.K  # move pointer  ptr->pointer
+            self.masked_code_queue_ptr[0] = masked_code_ptr
+        elif option == 'nl':
+            nl_ptr = int(self.nl_queue_ptr)
+            assert self.K % batch_size == 0  # for simplicity
+            # replace the keys at ptr (dequeue and enqueue)
+            self.nl_queue[:, nl_ptr:nl_ptr + batch_size] = keys.T
+            nl_ptr = (nl_ptr + batch_size) % self.K  # move pointer  ptr->pointer
+            self.nl_queue_ptr[0] = nl_ptr
+        elif option == 'masked_nl':
+            masked_nl_ptr = int(self.masked_nl_queue_ptr)
+            assert self.K % batch_size == 0  # for simplicity
+            # replace the keys at ptr (dequeue and enqueue)
+            self.masked_nl_queue[:, masked_nl_ptr:masked_nl_ptr + batch_size] = keys.T
+            masked_nl_ptr = (masked_nl_ptr + batch_size) % self.K  # move pointer  ptr->pointer
+            self.masked_nl_queue_ptr[0] = masked_nl_ptr
+    def forward(self, source_code_q, source_code_k, nl_q,nl_k):
+        """
+        Input:
+            im_q: a batch of query images
+            im_k: a batch of key images
+        Output:
+            logits, targets
+        """
+        if not self.args.do_multi_lang_continue_pre_train:
+            # logger.info(".do_multi_lang_continue_pre_train")
+            outputs = self.code_encoder_q(source_code_q, attention_mask=source_code_q.ne(1))[0]
+            code_q  = (outputs*source_code_q.ne(1)[:,:,None]).sum(1)/source_code_q.ne(1).sum(-1)[:,None] # None作为ndarray或tensor的索引作用是增加维度，
+            code_q  = torch.nn.functional.normalize(code_q, p=2, dim=1)
+            # compute query features for nl
+            outputs= self.nl_encoder_q(nl_q, attention_mask=nl_q.ne(1))[0]  # queries: NxC   bs*feature_dim
+            nl_q = (outputs*nl_q.ne(1)[:,:,None]).sum(1)/nl_q.ne(1).sum(-1)[:,None]
+            nl_q = torch.nn.functional.normalize(nl_q, p=2, dim=1)
+            code2nl_logits = torch.einsum("ab,cb->ac", code_q,nl_q )
+            # loss = self.loss_fct(scores*20, torch.arange(code_inputs.size(0), device=scores.device))
+            code2nl_logits /= self.T
+            # label
+            code2nl_label = torch.arange(code2nl_logits.size(0), device=code2nl_logits.device)
+            return code2nl_logits,code2nl_label, None, None
+        if self.agg_way == "avg":
+            # compute query features for source code
+            outputs = self.code_encoder_q(source_code_q, attention_mask=source_code_q.ne(1))[0]
+            code_q  = (outputs*source_code_q.ne(1)[:,:,None]).sum(1)/source_code_q.ne(1).sum(-1)[:,None] # None作为ndarray或tensor的索引作用是增加维度，
+            code_q  = torch.nn.functional.normalize(code_q, p=2, dim=1)
+            # compute query features for nl
+            outputs= self.nl_encoder_q(nl_q, attention_mask=nl_q.ne(1))[0]  # queries: NxC   bs*feature_dim
+            nl_q = (outputs*nl_q.ne(1)[:,:,None]).sum(1)/nl_q.ne(1).sum(-1)[:,None]
+            nl_q = torch.nn.functional.normalize(nl_q, p=2, dim=1)
+            # compute key features
+            with torch.no_grad():  # no gradient to keys
+                self._momentum_update_key_encoder()  # update the key encoder
+                # shuffle for making use of BN
+                # im_k, idx_unshuffle = self._batch_shuffle_ddp(im_k)
+                # masked code
+                outputs = self.code_encoder_k(source_code_k, attention_mask=source_code_k.ne(1))[0]  # keys: NxC
+                code_k  = (outputs*source_code_k.ne(1)[:,:,None]).sum(1)/source_code_k.ne(1).sum(-1)[:,None] # None作为ndarray或tensor的索引作用是增加维度，
+                code_k  = torch.nn.functional.normalize( code_k, p=2, dim=1)
+                # masked nl
+                outputs = self.nl_encoder_k(nl_k, attention_mask=nl_k.ne(1))[0]   # keys: bs*dim
+                nl_k = (outputs*nl_k.ne(1)[:,:,None]).sum(1)/nl_k.ne(1).sum(-1)[:,None]
+                nl_k = torch.nn.functional.normalize(nl_k, p=2, dim=1)
+        elif self.agg_way == "cls_pooler":
+            # logger.info(self.agg_way )
+            # compute query features for source code
+            outputs = self.code_encoder_q(source_code_q, attention_mask=source_code_q.ne(1))[1]
+            code_q  = torch.nn.functional.normalize(code_q, p=2, dim=1)
+            # compute query features for nl
+            outputs= self.nl_encoder_q(nl_q, attention_mask=nl_q.ne(1))[1]  # queries: NxC   bs*feature_dim
+            nl_q = torch.nn.functional.normalize(nl_q, p=2, dim=1)
+            # compute key features
+            with torch.no_grad():  # no gradient to keys
+                self._momentum_update_key_encoder()  # update the key encoder
+                # shuffle for making use of BN
+                # im_k, idx_unshuffle = self._batch_shuffle_ddp(im_k)
+                # masked code
+                outputs = self.code_encoder_k(source_code_k, attention_mask=source_code_k.ne(1))[1]  # keys: NxC
+                code_k  = torch.nn.functional.normalize( code_k, p=2, dim=1)
+                # masked nl
+                outputs = self.nl_encoder_k(nl_k, attention_mask=nl_k.ne(1))[1]   # keys: bs*dim
+                nl_k = torch.nn.functional.normalize(nl_k, p=2, dim=1)
+        elif self.agg_way == "avg_cls_pooler":
+            # logger.info(self.agg_way )
+            outputs = self.code_encoder_q(source_code_q, attention_mask=source_code_q.ne(1))
+            code_q_cls = outputs[1]
+            outputs = outputs[0]
+            code_q_avg  = (outputs*source_code_q.ne(1)[:,:,None]).sum(1)/source_code_q.ne(1).sum(-1)[:,None] # None作为ndarray或tensor的索引作用是增加维度，
+            code_q = code_q_cls + code_q_avg
+            code_q  = torch.nn.functional.normalize(code_q, p=2, dim=1)
+            # compute query features for nl
+            outputs= self.nl_encoder_q(nl_q, attention_mask=nl_q.ne(1))
+            nl_q_cls = outputs[1]
+            outputs= outputs[0]  # queries: NxC   bs*feature_dim
+            nl_q_avg = (outputs*nl_q.ne(1)[:,:,None]).sum(1)/nl_q.ne(1).sum(-1)[:,None]
+            nl_q = nl_q_avg  + nl_q_cls
+            nl_q = torch.nn.functional.normalize(nl_q, p=2, dim=1)
+            # compute key features
+            with torch.no_grad():  # no gradient to keys
+                self._momentum_update_key_encoder()  # update the key encoder
+                # shuffle for making use of BN
+                # im_k, idx_unshuffle = self._batch_shuffle_ddp(im_k)
+                # masked code
+                outputs = self.code_encoder_k(source_code_k, attention_mask=source_code_k.ne(1))
+                code_k_cls = outputs[1]  # keys: NxC
+                outputs = outputs[0]
+                code_k_avg  = (outputs*source_code_k.ne(1)[:,:,None]).sum(1)/source_code_k.ne(1).sum(-1)[:,None] # None作为ndarray或tensor的索引作用是增加维度，
+                code_k =  code_k_cls + code_k_avg
+                code_k  = torch.nn.functional.normalize( code_k, p=2, dim=1)
+                # masked nl
+                outputs = self.nl_encoder_k(nl_k, attention_mask=nl_k.ne(1))
+                nl_k_cls = outputs[1]   # keys: bs*dim
+                outputs = outputs[0]
+                nl_k_avg = (outputs*nl_k.ne(1)[:,:,None]).sum(1)/nl_k.ne(1).sum(-1)[:,None]
+                nl_k =  nl_k_cls + nl_k_avg
+                nl_k = torch.nn.functional.normalize(nl_k, p=2, dim=1)
+        # ## do_whitening
+        # if self.do_whitening:
+        #     code_q = whitening_torch_final(code_q)
+        #     code_k = whitening_torch_final(code_k)
+        #     nl_q = whitening_torch_final(nl_q)
+        #     nl_k = whitening_torch_final(nl_k)
+        ## code vs nl
+        code2nl_pos = torch.einsum('nc,bc->nb', [code_q, nl_q])
+        # negative logits: NxK
+        code2nl_neg = torch.einsum('nc,ck->nk', [code_q, self.nl_queue.clone().detach()])
+        # logits: Nx(n+K)
+        code2nl_logits = torch.cat([self.time_score*code2nl_pos, code2nl_neg], dim=1)
+        # apply temperature
+        code2nl_logits /= self.T
+        # label
+        code2nl_label = torch.arange(code2nl_logits.size(0), device=code2nl_logits.device)
+        ## code vs masked nl
+        code2maskednl_pos = torch.einsum('nc,bc->nb', [code_q, nl_k])
+        # negative logits: NxK
+        code2maskednl_neg = torch.einsum('nc,ck->nk', [code_q, self.masked_nl_queue.clone().detach()])
+        # logits: Nx(n+K)
+        code2maskednl_logits = torch.cat([self.time_score*code2maskednl_pos, code2maskednl_neg], dim=1)
+        # apply temperature
+        code2maskednl_logits /= self.T
+        # label
+        code2maskednl_label = torch.arange(code2maskednl_logits.size(0), device=code2maskednl_logits.device)
+        ## nl vs code
+        # nl2code_pos = torch.einsum('nc,nc->n', [nl_q, code_k]).unsqueeze(-1)
+        nl2code_pos = torch.einsum('nc,bc->nb', [nl_q, code_q])
+        # negative logits: bsxK
+        nl2code_neg = torch.einsum('nc,ck->nk', [nl_q, self.code_queue.clone().detach()])
+        # nl2code_logits: bsx(n+K)
+        nl2code_logits = torch.cat([self.time_score*nl2code_pos, nl2code_neg], dim=1)
+        # apply temperature
+        nl2code_logits /= self.T
+        # label
+        nl2code_label = torch.arange(nl2code_logits.size(0), device=nl2code_logits.device)
+        ## nl vs masked code
+        # nl2code_pos = torch.einsum('nc,nc->n', [nl_q, code_k]).unsqueeze(-1)
+        nl2maskedcode_pos = torch.einsum('nc,bc->nb', [nl_q, code_k])
+        # negative logits: bsxK
+        nl2maskedcode_neg = torch.einsum('nc,ck->nk', [nl_q, self.masked_code_queue.clone().detach()])
+        # nl2code_logits: bsx(n+K)
+        nl2maskedcode_logits = torch.cat([self.time_score*nl2maskedcode_pos, nl2maskedcode_neg], dim=1)
+        # apply temperature
+        nl2maskedcode_logits /= self.T
+        # label
+        nl2maskedcode_label = torch.arange(nl2maskedcode_logits.size(0), device=nl2maskedcode_logits.device)
+        #logit 4*bsx(1+K)
+        inter_logits = torch.cat((code2nl_logits, code2maskednl_logits, nl2code_logits ,nl2maskedcode_logits ), dim=0)
+        # labels: positive key indicators
+        # inter_labels = torch.zeros(inter_logits.shape[0], dtype=torch.long).cuda()
+        inter_labels =  torch.cat((code2nl_label, code2maskednl_label, nl2code_label, nl2maskedcode_label), dim=0)
+        if self.do_ineer_loss:
+            # logger.info("do_ineer_loss")
+            ## code vs masked code
+            code2maskedcode_pos = torch.einsum('nc,bc->nb', [code_q, code_k])
+            # negative logits: NxK
+            code2maskedcode_neg = torch.einsum('nc,ck->nk', [code_q, self.masked_code_queue.clone().detach()])
+            # logits: Nx(n+K)
+            code2maskedcode_logits = torch.cat([self.time_score*code2maskedcode_pos, code2maskedcode_neg], dim=1)
+            # apply temperature
+            code2maskedcode_logits /= self.T
+            # label
+            code2maskedcode_label = torch.arange(code2maskedcode_logits.size(0), device=code2maskedcode_logits.device)
+            ## nl vs masked nl
+            # nl2code_pos = torch.einsum('nc,nc->n', [nl_q, code_k]).unsqueeze(-1)
+            nl2maskednl_pos = torch.einsum('nc,bc->nb', [nl_q, nl_k])
+            # negative logits: bsxK
+            nl2maskednl_neg = torch.einsum('nc,ck->nk', [nl_q, self.masked_nl_queue.clone().detach()])
+            # nl2code_logits: bsx(n+K)
+            nl2maskednl_logits = torch.cat([self.time_score*nl2maskednl_pos, nl2maskednl_neg], dim=1)
+            # apply temperature
+            nl2maskednl_logits /= self.T
+            # label
+            nl2maskednl_label = torch.arange(nl2maskednl_logits.size(0), device=nl2maskednl_logits.device)
+            #logit 6*bsx(1+K)
+            inter_logits = torch.cat((inter_logits, code2maskedcode_logits, nl2maskednl_logits), dim=0)
+            # labels: positive key indicators
+            # inter_labels = torch.zeros(inter_logits.shape[0], dtype=torch.long).cuda()
+            inter_labels =  torch.cat(( inter_labels, code2maskedcode_label, nl2maskednl_label ), dim=0)
+        # dequeue and enqueue
+        self._dequeue_and_enqueue(code_q, option='code')
+        self._dequeue_and_enqueue(nl_q, option='nl')
+        self._dequeue_and_enqueue(code_k, option='masked_code')
+        self._dequeue_and_enqueue(nl_k, option='masked_nl')
+        return inter_logits, inter_labels, code_q, nl_q

saved_models/codesearch_contrastive_learning/Model/model.py ADDED Viewed

	@@ -0,0 +1,453 @@

+import torch
+import torch.nn as nn
+import os
+__all__ = [
+    "ResNet",
+    "resnet18_with_dropout",
+    "resnet18",
+    "dropout_resnet18"
+]
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=dilation,
+        groups=groups,
+        bias=False,
+        dilation=dilation,
+    )
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(nn.Module):
+    expansion = 1
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        stride=1,
+        downsample=None,
+        groups=1,
+        base_width=64,
+        dilation=1,
+        norm_layer=None,
+    ):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError("BasicBlock only supports groups=1 and base_width=64")
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class BasicBlock_withDropout(nn.Module):
+    expansion = 1
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        stride=1,
+        downsample=None,
+        groups=1,
+        base_width=64,
+        dilation=1,
+        norm_layer=None,
+    ):
+        super(BasicBlock_withDropout, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError("BasicBlock only supports groups=1 and base_width=64")
+        if dilation > 1:
+            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
+        self.dropout = nn.Dropout(p=0.5)
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+        # print('with_dropout',self.with_dropout)
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(nn.Module):
+    expansion = 4
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        stride=1,
+        downsample=None,
+        groups=1,
+        base_width=64,
+        dilation=1,
+        norm_layer=None,
+    ):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.0)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(nn.Module):
+    def __init__(
+        self,
+        block,
+        layers,
+        with_dropout,
+        num_classes=10,
+        zero_init_residual=False,
+        groups=1,
+        width_per_group=64,
+        replace_stride_with_dilation=None,
+        norm_layer=None,
+    ):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError(
+                "replace_stride_with_dilation should be None "
+                "or a 3-element tuple, got {}".format(replace_stride_with_dilation)
+            )
+        self.with_dropout = with_dropout
+        self.groups = groups
+        self.base_width = width_per_group
+        # CIFAR10: kernel_size 7 -> 3, stride 2 -> 1, padding 3->1
+        self.conv1 = nn.Conv2d(
+            3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False
+        )
+        # END
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(
+            block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]
+        )
+        self.layer3 = self._make_layer(
+            block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]
+        )
+        self.layer4 = self._make_layer(
+            block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]
+        )
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(512 * block.expansion, num_classes)
+        if self.with_dropout:
+            self.fc = nn.Sequential(nn.Flatten(),nn.Dropout(0.5),nn.Linear(512 * block.expansion, num_classes))
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(
+            block(
+                self.inplanes,
+                planes,
+                stride,
+                downsample,
+                self.groups,
+                self.base_width,
+                previous_dilation,
+                norm_layer,
+            )
+        )
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    groups=self.groups,
+                    base_width=self.base_width,
+                    dilation=self.dilation,
+                    norm_layer=norm_layer,
+                )
+            )
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.reshape(x.size(0), -1)
+        x = self.fc(x)
+        return x
+    def feature(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.avgpool(x)
+        x = x.reshape(x.size(0), -1)
+        return x
+    def prediction(self,x):
+        x = self.fc(x)
+        return x
+    # def gap(self, x):
+    #     x = self.conv1(x)
+    #     x = self.bn1(x)
+    #     x = self.relu(x)
+    #     x = self.maxpool(x)
+    #     x = self.layer1(x)
+    #     x = self.layer2(x)
+    #     x = self.layer3(x)
+    #     x = self.layer4(x)
+    #     x = self.avgpool(x)
+    #     x = x.reshape(x.size(0), -1)
+    #     return x
+def _resnet(arch, block, layers, pretrained, progress, device, with_dropout, **kwargs):
+    model = ResNet(block, layers, with_dropout, **kwargs)
+    if pretrained:
+        script_dir = os.path.dirname(__file__)
+        state_dict = torch.load(
+            script_dir + "/state_dicts/" + arch + ".pt", map_location=device
+        )
+        model.load_state_dict(state_dict)
+    return model
+def resnet18_with_dropout(pretrained=False, progress=True, device="cpu", **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet(
+        "resnet18", BasicBlock_withDropout, [2, 2, 2, 2], pretrained, progress, device, with_dropout = True, **kwargs
+    )
+def resnet18(pretrained=False, progress=True, device="cpu", **kwargs):
+    """Constructs a ResNet-18 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet(
+        "resnet18", BasicBlock, [2, 2, 2, 2], pretrained, progress, device, with_dropout = False, **kwargs
+    )
+def resnet34(pretrained=False, progress=True, device="cpu", **kwargs):
+    """Constructs a ResNet-34 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet(
+        "resnet34", BasicBlock, [3, 4, 6, 3], pretrained, progress, device, **kwargs
+    )
+def resnet50(pretrained=False, progress=True, device="cpu", **kwargs):
+    """Constructs a ResNet-50 model.
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on ImageNet
+        progress (bool): If True, displays a progress bar of the download to stderr
+    """
+    return _resnet(
+        "resnet50", Bottleneck, [3, 4, 6, 3], pretrained, progress, device, **kwargs
+    )
+# class dropout_residual(nn.Module):
+#   def __init__(self, input_channels, num_channels, dropout_rate, dropout_type, init_dict, use_1x1conv=False, strides=1, **kwargs):
+#     super().__init__(**kwargs)
+#     self.conv1 = Dropout_Conv2D(input_channels, num_channels, kernel_size=3, padding=1, stride=strides, dropout_rate=dropout_rate, dropout_type=dropout_type, init_dict=init_dict)
+#     self.conv2 = Dropout_Conv2D(num_channels, num_channels, kernel_size=3, padding=1, dropout_rate=dropout_rate, dropout_type=dropout_type, init_dict=init_dict)
+#     if use_1x1conv:
+#       self.conv3 = Dropout_Conv2D(input_channels, num_channels, kernel_size=1, stride=strides, dropout_rate=dropout_rate, dropout_type=dropout_type)
+#     else:
+#       self.conv3 = None
+#     self.bn1 = nn.BatchNorm2d(num_channels)
+#     self.bn2 = nn.BatchNorm2d(num_channels)
+# def dropout_resnet_block(input_channels, num_channels, num_residuals, dropout_rate, dropout_type, init_dict, first_block=False):
+#   blk = []
+#   for i in range(num_residuals):
+#     if i == 0 and not first_block:
+#       blk.append(dropout_residual(input_channels, num_channels, dropout_rate=dropout_rate, dropout_type=dropout_type, init_dict=init_dict, use_1x1conv=True, strides=2))
+#     else:
+#       blk.append(dropout_residual(num_channels, num_channels, dropout_rate=dropout_rate, dropout_type=dropout_type, init_dict=init_dict))
+#   return blk
+# def dropout_resnet18(dropout_rate=0.5, dropout_type="w", init_dict=dict()):
+#   b1 = nn.Sequential(
+#       Dropout_Conv2D(1, 64, kernel_size=7, stride=2, padding=3, dropout_rate=dropout_rate, dropout_type=dropout_type, init_dict=init_dict),
+#       nn.BatchNorm2d(64),
+#       nn.ReLU(),
+#       nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+#     )
+#   b2 = nn.Sequential(*dropout_resnet_block(64, 64, 2, dropout_rate=dropout_rate, dropout_type=dropout_type, init_dict=init_dict, first_block=True))
+#   b3 = nn.Sequential(*dropout_resnet_block(64, 128, 2, dropout_rate=dropout_rate, dropout_type=dropout_type, init_dict=init_dict))
+#   b4 = nn.Sequential(*dropout_resnet_block(128, 256, 2, dropout_rate=dropout_rate, dropout_type=dropout_type, init_dict=init_dict))
+#   b5 = nn.Sequential(*dropout_resnet_block(256, 512, 2, dropout_rate=dropout_rate, dropout_type=dropout_type, init_dict=init_dict))
+#   return nn.Sequential(b1, b2, b3, b4, b5,
+#                        nn.AdaptiveAvgPool2d((1,1)),
+#                        nn.Flatten(),
+#                        Dropout_Linear(512, 20, dropout_rate=dropout_rate, dropout_type=dropout_type, init_dict=init_dict))

saved_models/codesearch_contrastive_learning/Model/time_base_dvi.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"complex_construction": {"1": 11.107, "2": 11.239}, "training": {"1": 210.918, "2": 216.733}}

saved_models/codesearch_contrastive_learning/Testing_data/testing_dataset_label.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4d5db38fa60eca28dbc4c1b14f5833385f954d95f46536e3c30212004bb1f73
+size 2955

saved_models/codesearch_contrastive_learning/Training_data/training_dataset_label.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8821e7f4b0cec0c3d59d148845a4bd1e0f4f0be576e3da2d1bc97fb74a663eea
+size 50445

saved_models/codesearch_contrastive_learning/config.json ADDED Viewed

	@@ -0,0 +1,101 @@

+{
+    "SETTING": "normal",
+    "CLASSES": ["plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"],
+    "DATASET": "cifar10",
+    "EPOCH_START": 1,
+    "EPOCH_END": 200,
+    "EPOCH_PERIOD": 1,
+    "GPU":0,
+    "TRAINING": {
+        "NET": "resnet18",
+        "loader_tr_args": {"batch_size": 128, "num_workers": 1},
+        "loader_te_args": {"batch_size": 1000, "num_workers": 1},
+        "optimizer_args": {"lr": 0.1, "momentum": 0.9, "weight_decay": 5e-4},
+        "num_class": 10,
+        "train_num": 50000,
+        "test_num": 10000,
+        "milestone":[160]
+    },
+    "VISUALIZATION":{
+        "S_LAMBDA":1,
+        "PREPROCESS":0,
+        "BOUNDARY":{
+            "B_N_EPOCHS": 0,
+            "L_BOUND":0.6
+        },
+        "INIT_NUM":300,
+        "ALPHA":0,
+        "BETA":0.1,
+        "MAX_HAUSDORFF":0.4,
+        "LAMBDA": 10.0,
+        "HIDDEN_LAYER":4,
+        "ENCODER_DIMS": [512,256,256,256,256,2],
+        "DECODER_DIMS": [2,256,256,256,256,512],
+        "N_NEIGHBORS":15,
+        "MAX_EPOCH": 20,
+        "S_N_EPOCHS": 5,
+        "T_N_EPOCHS": 100,
+        "PATIENT": 3,
+        "RESOLUTION":300,
+        "VIS_MODEL_NAME": "vis",
+        "EVALUATION_NAME": "evalution"
+    },
+    "DVI": {
+        "SETTING": "normal",
+        "CLASSES": ["0"],
+        "DATASET": "cifar10",
+        "GPU": "0",
+        "EPOCH_START": 1,
+        "EPOCH_END": 2,
+        "EPOCH_PERIOD": 1,
+        "EPOCH_NAME":"Epoch",
+        "TRAINING": {
+            "NET": "resnet18",
+            "loader_tr_args": {
+                "batch_size": 128,
+                "num_workers": 1
+            },
+            "loader_te_args": {
+                "batch_size": 1000,
+                "num_workers": 1
+            },
+            "optimizer_args": {
+                "lr": 0.1,
+                "momentum": 0.9,
+                "weight_decay": 0.0005
+            },
+            "num_class": 1,
+            "train_num": 24927,
+            "test_num": 1261,
+            "milestone": [
+                10
+            ]
+        },
+        "VISUALIZATION": {
+            "PREPROCESS": 1,
+            "BOUNDARY": {
+                "B_N_EPOCHS": 0,
+                "L_BOUND": 0.4
+            },
+            "BATCH_SIZE":1000,
+            "LAMBDA1": 5.0,
+            "LAMBDA2": 0.3,
+            "ENCODER_DIMS_O": [128,64,64,64,64,2],
+            "DECODER_DIMS_O": [2,64,64,64,64,128],
+            "ENCODER_DIMS": [768,256,256,256,256,2],
+            "DECODER_DIMS": [2,256,256,256,256,768],
+            "N_NEIGHBORS": 15,
+            "MAX_EPOCH": 20,
+            "S_N_EPOCHS": 10,
+            "PATIENT": 3,
+            "RESOLUTION": 300,
+            "VIS_MODEL_NAME": "dvi",
+            "FLAG": "_temporal_id_withoutB",
+            "EVALUATION_NAME": "evaluation_tfDVI"
+        }
+    }
+}

saved_models/codesearch_contrastive_learning/config_dvi_modi.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+    "SETTING": "normal",
+    "CLASSES": [
+        "0"
+    ],
+    "GPU": "0",
+    "DATASET": "cifar10",
+    "EPOCH_START": 1,
+    "EPOCH_END": 1,
+    "EPOCH_PERIOD": 1,
+    "TRAINING": {
+        "NET": "resnet18",
+        "num_class": 1,
+        "train_num": 24927,
+        "test_num": 1261
+    },
+    "VISUALIZATION": {
+        "PREPROCESS": 1,
+        "BOUNDARY": {
+            "B_N_EPOCHS": 0,
+            "L_BOUND": 0.5
+        },
+        "INIT_NUM": 300,
+        "ALPHA": 1,
+        "BETA": 1,
+        "MAX_HAUSDORFF": 0.33,
+        "LAMBDA1": 1,
+        "LAMBDA2": 0.3,
+        "S_LAMBDA": 1,
+        "ENCODER_DIMS": [
+            768,
+            256,
+            256,
+            256,
+            256,
+            2
+        ],
+        "DECODER_DIMS": [
+            2,
+            256,
+            256,
+            256,
+            256,
+            768
+        ],
+        "N_NEIGHBORS": 15,
+        "MAX_EPOCH": 200,
+        "S_N_EPOCHS": 5,
+        "T_N_EPOCHS": 20,
+        "PATIENT": 3,
+        "RESOLUTION": 300,
+        "VIS_MODEL_NAME": "dvi",
+        "EVALUATION_NAME": "test_evaluation_dvi"
+    }
+}

saved_models/codesearch_contrastive_learning/iteration_structure.json ADDED Viewed

	@@ -0,0 +1,12 @@

+[
+    {
+        "value": 1,
+        "name": "code",
+        "pid": ""
+    },
+    {
+        "value": 2,
+        "name": "query",
+        "pid": 1
+    }
+]

saved_models/fine_tune/Ruby/running.log ADDED Viewed

	@@ -0,0 +1,215 @@

+02/17/2024 13:45:08 - INFO - __main__ -   device: cuda, n_gpu: 1
+02/17/2024 13:45:12 - INFO - __main__ -   +------------------------------------------------------------+--------------+----------+
+| Layer Name                                                 | Output Shape |  Param # |
++------------------------------------------------------------+--------------+----------+
+| encoder.embeddings.word_embeddings.weight                  | [51451, 768] | 39514368 |
+| encoder.embeddings.position_embeddings.weight              |  [1026, 768] |   787968 |
+| encoder.embeddings.token_type_embeddings.weight            |    [10, 768] |     7680 |
+| encoder.embeddings.LayerNorm.weight                        |        [768] |      768 |
+| encoder.embeddings.LayerNorm.bias                          |        [768] |      768 |
+| encoder.encoder.layer.0.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.0.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.0.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.0.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.0.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.0.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.0.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.0.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.0.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.0.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.0.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.0.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.0.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.0.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.0.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.0.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.1.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.1.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.1.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.1.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.1.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.1.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.1.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.1.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.1.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.1.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.1.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.1.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.1.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.1.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.1.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.1.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.2.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.2.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.2.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.2.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.2.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.2.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.2.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.2.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.2.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.2.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.2.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.2.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.2.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.2.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.2.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.2.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.3.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.3.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.3.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.3.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.3.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.3.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.3.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.3.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.3.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.3.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.3.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.3.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.3.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.3.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.3.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.3.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.4.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.4.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.4.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.4.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.4.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.4.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.4.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.4.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.4.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.4.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.4.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.4.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.4.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.4.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.4.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.4.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.5.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.5.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.5.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.5.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.5.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.5.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.5.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.5.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.5.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.5.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.5.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.5.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.5.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.5.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.5.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.5.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.6.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.6.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.6.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.6.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.6.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.6.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.6.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.6.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.6.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.6.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.6.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.6.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.6.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.6.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.6.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.6.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.7.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.7.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.7.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.7.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.7.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.7.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.7.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.7.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.7.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.7.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.7.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.7.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.7.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.7.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.7.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.7.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.8.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.8.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.8.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.8.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.8.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.8.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.8.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.8.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.8.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.8.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.8.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.8.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.8.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.8.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.8.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.8.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.9.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.9.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.9.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.9.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.9.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.9.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.9.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.9.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.9.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.9.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.9.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.9.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.9.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.9.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.9.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.9.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.10.attention.self.query.weight       |   [768, 768] |   589824 |
+| encoder.encoder.layer.10.attention.self.query.bias         |        [768] |      768 |
+| encoder.encoder.layer.10.attention.self.key.weight         |   [768, 768] |   589824 |
+| encoder.encoder.layer.10.attention.self.key.bias           |        [768] |      768 |
+| encoder.encoder.layer.10.attention.self.value.weight       |   [768, 768] |   589824 |
+| encoder.encoder.layer.10.attention.self.value.bias         |        [768] |      768 |
+| encoder.encoder.layer.10.attention.output.dense.weight     |   [768, 768] |   589824 |
+| encoder.encoder.layer.10.attention.output.dense.bias       |        [768] |      768 |
+| encoder.encoder.layer.10.attention.output.LayerNorm.weight |        [768] |      768 |
+| encoder.encoder.layer.10.attention.output.LayerNorm.bias   |        [768] |      768 |
+| encoder.encoder.layer.10.intermediate.dense.weight         |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.10.intermediate.dense.bias           |       [3072] |     3072 |
+| encoder.encoder.layer.10.output.dense.weight               |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.10.output.dense.bias                 |        [768] |      768 |
+| encoder.encoder.layer.10.output.LayerNorm.weight           |        [768] |      768 |
+| encoder.encoder.layer.10.output.LayerNorm.bias             |        [768] |      768 |
+| encoder.encoder.layer.11.attention.self.query.weight       |   [768, 768] |   589824 |
+| encoder.encoder.layer.11.attention.self.query.bias         |        [768] |      768 |
+| encoder.encoder.layer.11.attention.self.key.weight         |   [768, 768] |   589824 |
+| encoder.encoder.layer.11.attention.self.key.bias           |        [768] |      768 |
+| encoder.encoder.layer.11.attention.self.value.weight       |   [768, 768] |   589824 |
+| encoder.encoder.layer.11.attention.self.value.bias         |        [768] |      768 |
+| encoder.encoder.layer.11.attention.output.dense.weight     |   [768, 768] |   589824 |
+| encoder.encoder.layer.11.attention.output.dense.bias       |        [768] |      768 |
+| encoder.encoder.layer.11.attention.output.LayerNorm.weight |        [768] |      768 |
+| encoder.encoder.layer.11.attention.output.LayerNorm.bias   |        [768] |      768 |
+| encoder.encoder.layer.11.intermediate.dense.weight         |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.11.intermediate.dense.bias           |       [3072] |     3072 |
+| encoder.encoder.layer.11.output.dense.weight               |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.11.output.dense.bias                 |        [768] |      768 |
+| encoder.encoder.layer.11.output.LayerNorm.weight           |        [768] |      768 |
+| encoder.encoder.layer.11.output.LayerNorm.bias             |        [768] |      768 |
+| encoder.pooler.dense.weight                                |   [768, 768] |   589824 |
+| encoder.pooler.dense.bias                                  |        [768] |      768 |
++------------------------------------------------------------+--------------+----------+
+02/17/2024 13:45:12 - INFO - __main__ -   Training/evaluation parameters Namespace(agg_way='avg', aug_type_way='random_replace_type', code_length=256, codebase_file='dataset/Ruby/codebase.jsonl', config_name='DeepSoftwareAnalytics/CoCoSoDa', couninue_pre_train_data_files=['dataset/ruby/train.jsonl', 'dataset/java/train.jsonl'], data_aug_type='random_mask', data_flow_length=0, debug=False, device=device(type='cuda'), do_avg=False, do_continue_pre_trained=False, do_eval=False, do_fine_tune=False, do_ineer_loss=False, do_multi_lang_continue_pre_train=False, do_single_lang_continue_pre_train=False, do_test=True, do_train=True, do_whitening=False, do_zero_short=False, epoch=50, eval_batch_size=64, eval_data_file='dataset/Ruby/valid.jsonl', eval_frequency=100, fp16=False, gradient_accumulation_steps=1, hidden_size=768, lang='Ruby', learning_rate=2e-05, loaded_codebert_model_filename=None, loaded_model_filename=None, local_rank=-1, logging_steps=50, max_codeblock_num=10, max_grad_norm=1.0, max_steps=100, mlm_probability=0.1, mlp=False, moco_dim=768, moco_k=1024, moco_m=0.999, moco_t=0.07, moco_type='encoder_queue', model_name_or_path='DeepSoftwareAnalytics/CoCoSoDa', model_type='base', n_debug_samples=100, n_gpu=1, nl_length=128, num_train_epochs=5, num_warmup_steps=0, only_save_the_nl_code_vec=False, output_dir='./saved_models/fine_tune/Ruby', print_align_unif_loss=False, save_evaluation_reuslt=False, save_evaluation_reuslt_dir=None, save_steps=50, seed=123456, test_data_file='dataset/Ruby/test.jsonl', time_score=1, tokenizer_name='DeepSoftwareAnalytics/CoCoSoDa', train_batch_size=128, train_data_file='dataset/Ruby/train.jsonl', use_best_mrr_model=False, weight_decay=0.01)
+Traceback (most recent call last):
+  File "run.py", line 1188, in <module>
+    main()
+  File "run.py", line 1154, in main
+    train(args, model, tokenizer, pool)
+  File "run.py", line 534, in train
+    train_dataset=TextDataset_unixcoder(tokenizer, args, args.train_data_file, pool)
+  File "run.py", line 393, in __init__
+    with open(file_path) as f:
+FileNotFoundError: [Errno 2] No such file or directory: 'dataset/Ruby/train.jsonl'

saved_models/fine_tune/java/running.log ADDED Viewed

	@@ -0,0 +1,268 @@

+02/17/2024 13:45:37 - INFO - __main__ -   device: cuda, n_gpu: 1
+02/17/2024 13:45:41 - INFO - __main__ -   +------------------------------------------------------------+--------------+----------+
+| Layer Name                                                 | Output Shape |  Param # |
++------------------------------------------------------------+--------------+----------+
+| encoder.embeddings.word_embeddings.weight                  | [51451, 768] | 39514368 |
+| encoder.embeddings.position_embeddings.weight              |  [1026, 768] |   787968 |
+| encoder.embeddings.token_type_embeddings.weight            |    [10, 768] |     7680 |
+| encoder.embeddings.LayerNorm.weight                        |        [768] |      768 |
+| encoder.embeddings.LayerNorm.bias                          |        [768] |      768 |
+| encoder.encoder.layer.0.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.0.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.0.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.0.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.0.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.0.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.0.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.0.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.0.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.0.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.0.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.0.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.0.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.0.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.0.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.0.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.1.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.1.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.1.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.1.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.1.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.1.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.1.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.1.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.1.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.1.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.1.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.1.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.1.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.1.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.1.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.1.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.2.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.2.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.2.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.2.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.2.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.2.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.2.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.2.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.2.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.2.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.2.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.2.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.2.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.2.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.2.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.2.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.3.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.3.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.3.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.3.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.3.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.3.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.3.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.3.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.3.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.3.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.3.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.3.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.3.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.3.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.3.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.3.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.4.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.4.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.4.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.4.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.4.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.4.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.4.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.4.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.4.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.4.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.4.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.4.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.4.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.4.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.4.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.4.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.5.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.5.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.5.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.5.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.5.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.5.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.5.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.5.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.5.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.5.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.5.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.5.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.5.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.5.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.5.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.5.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.6.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.6.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.6.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.6.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.6.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.6.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.6.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.6.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.6.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.6.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.6.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.6.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.6.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.6.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.6.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.6.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.7.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.7.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.7.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.7.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.7.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.7.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.7.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.7.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.7.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.7.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.7.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.7.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.7.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.7.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.7.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.7.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.8.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.8.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.8.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.8.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.8.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.8.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.8.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.8.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.8.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.8.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.8.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.8.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.8.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.8.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.8.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.8.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.9.attention.self.query.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.9.attention.self.query.bias          |        [768] |      768 |
+| encoder.encoder.layer.9.attention.self.key.weight          |   [768, 768] |   589824 |
+| encoder.encoder.layer.9.attention.self.key.bias            |        [768] |      768 |
+| encoder.encoder.layer.9.attention.self.value.weight        |   [768, 768] |   589824 |
+| encoder.encoder.layer.9.attention.self.value.bias          |        [768] |      768 |
+| encoder.encoder.layer.9.attention.output.dense.weight      |   [768, 768] |   589824 |
+| encoder.encoder.layer.9.attention.output.dense.bias        |        [768] |      768 |
+| encoder.encoder.layer.9.attention.output.LayerNorm.weight  |        [768] |      768 |
+| encoder.encoder.layer.9.attention.output.LayerNorm.bias    |        [768] |      768 |
+| encoder.encoder.layer.9.intermediate.dense.weight          |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.9.intermediate.dense.bias            |       [3072] |     3072 |
+| encoder.encoder.layer.9.output.dense.weight                |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.9.output.dense.bias                  |        [768] |      768 |
+| encoder.encoder.layer.9.output.LayerNorm.weight            |        [768] |      768 |
+| encoder.encoder.layer.9.output.LayerNorm.bias              |        [768] |      768 |
+| encoder.encoder.layer.10.attention.self.query.weight       |   [768, 768] |   589824 |
+| encoder.encoder.layer.10.attention.self.query.bias         |        [768] |      768 |
+| encoder.encoder.layer.10.attention.self.key.weight         |   [768, 768] |   589824 |
+| encoder.encoder.layer.10.attention.self.key.bias           |        [768] |      768 |
+| encoder.encoder.layer.10.attention.self.value.weight       |   [768, 768] |   589824 |
+| encoder.encoder.layer.10.attention.self.value.bias         |        [768] |      768 |
+| encoder.encoder.layer.10.attention.output.dense.weight     |   [768, 768] |   589824 |
+| encoder.encoder.layer.10.attention.output.dense.bias       |        [768] |      768 |
+| encoder.encoder.layer.10.attention.output.LayerNorm.weight |        [768] |      768 |
+| encoder.encoder.layer.10.attention.output.LayerNorm.bias   |        [768] |      768 |
+| encoder.encoder.layer.10.intermediate.dense.weight         |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.10.intermediate.dense.bias           |       [3072] |     3072 |
+| encoder.encoder.layer.10.output.dense.weight               |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.10.output.dense.bias                 |        [768] |      768 |
+| encoder.encoder.layer.10.output.LayerNorm.weight           |        [768] |      768 |
+| encoder.encoder.layer.10.output.LayerNorm.bias             |        [768] |      768 |
+| encoder.encoder.layer.11.attention.self.query.weight       |   [768, 768] |   589824 |
+| encoder.encoder.layer.11.attention.self.query.bias         |        [768] |      768 |
+| encoder.encoder.layer.11.attention.self.key.weight         |   [768, 768] |   589824 |
+| encoder.encoder.layer.11.attention.self.key.bias           |        [768] |      768 |
+| encoder.encoder.layer.11.attention.self.value.weight       |   [768, 768] |   589824 |
+| encoder.encoder.layer.11.attention.self.value.bias         |        [768] |      768 |
+| encoder.encoder.layer.11.attention.output.dense.weight     |   [768, 768] |   589824 |
+| encoder.encoder.layer.11.attention.output.dense.bias       |        [768] |      768 |
+| encoder.encoder.layer.11.attention.output.LayerNorm.weight |        [768] |      768 |
+| encoder.encoder.layer.11.attention.output.LayerNorm.bias   |        [768] |      768 |
+| encoder.encoder.layer.11.intermediate.dense.weight         |  [3072, 768] |  2359296 |
+| encoder.encoder.layer.11.intermediate.dense.bias           |       [3072] |     3072 |
+| encoder.encoder.layer.11.output.dense.weight               |  [768, 3072] |  2359296 |
+| encoder.encoder.layer.11.output.dense.bias                 |        [768] |      768 |
+| encoder.encoder.layer.11.output.LayerNorm.weight           |        [768] |      768 |
+| encoder.encoder.layer.11.output.LayerNorm.bias             |        [768] |      768 |
+| encoder.pooler.dense.weight                                |   [768, 768] |   589824 |
+| encoder.pooler.dense.bias                                  |        [768] |      768 |
++------------------------------------------------------------+--------------+----------+
+02/17/2024 13:45:41 - INFO - __main__ -   Training/evaluation parameters Namespace(agg_way='avg', aug_type_way='random_replace_type', code_length=256, codebase_file='dataset/java/codebase.jsonl', config_name='DeepSoftwareAnalytics/CoCoSoDa', couninue_pre_train_data_files=['dataset/ruby/train.jsonl', 'dataset/java/train.jsonl'], data_aug_type='random_mask', data_flow_length=0, debug=False, device=device(type='cuda'), do_avg=False, do_continue_pre_trained=False, do_eval=False, do_fine_tune=False, do_ineer_loss=False, do_multi_lang_continue_pre_train=False, do_single_lang_continue_pre_train=False, do_test=True, do_train=True, do_whitening=False, do_zero_short=False, epoch=50, eval_batch_size=64, eval_data_file='dataset/java/valid.jsonl', eval_frequency=100, fp16=False, gradient_accumulation_steps=1, hidden_size=768, lang='java', learning_rate=2e-05, loaded_codebert_model_filename=None, loaded_model_filename=None, local_rank=-1, logging_steps=50, max_codeblock_num=10, max_grad_norm=1.0, max_steps=100, mlm_probability=0.1, mlp=False, moco_dim=768, moco_k=1024, moco_m=0.999, moco_t=0.07, moco_type='encoder_queue', model_name_or_path='DeepSoftwareAnalytics/CoCoSoDa', model_type='base', n_debug_samples=100, n_gpu=1, nl_length=128, num_train_epochs=5, num_warmup_steps=0, only_save_the_nl_code_vec=False, output_dir='./saved_models/fine_tune/java', print_align_unif_loss=False, save_evaluation_reuslt=False, save_evaluation_reuslt_dir=None, save_steps=50, seed=123456, test_data_file='dataset/java/test.jsonl', time_score=1, tokenizer_name='DeepSoftwareAnalytics/CoCoSoDa', train_batch_size=128, train_data_file='dataset/java/train.jsonl', use_best_mrr_model=False, weight_decay=0.01)
+02/17/2024 13:48:46 - INFO - __main__ -   *** Example ***
+02/17/2024 13:48:46 - INFO - __main__ -   idx: 0
+02/17/2024 13:48:46 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', '@', '_Override', '_public', '_Image', 'Source', '_apply', '_(', '_Image', 'Source', '_input', '_)', '_{', '_final', '_int', '_[', '_]', '_[', '_]', '_pixel', 'Matrix', '_=', '_new', '_int', '_[', '_3', '_]', '_[', '_3', '_]', '_;', '_int', '_w', '_=', '_input', '_.', '_getWidth', '_(', '_)', '_;', '_int', '_h', '_=', '_input', '_.', '_getHeight', '_(', '_)', '_;', '_int', '_[', '_]', '_[', '_]', '_output', '_=', '_new', '_int', '_[', '_h', '_]', '_[', '_w', '_]', '_;', '_for', '_(', '_int', '_j', '_=', '_1', '_;', '_j', '_<', '_h', '_-', '_1', '_;', '_j', '_++', '_)', '_{', '_for', '_(', '_int', '_i', '_=', '_1', '_;', '_i', '_<', '_w', '_-', '_1', '_;', '_i', '_++', '_)', '_{', '_pixel', 'Matrix', '_[', '_0', '_]', '_[', '_0', '_]', '_=', '_input', '_.', '_get', 'R', '_(', '_i', '_-', '_1', '_,', '_j', '_-', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_0', '_]', '_[', '_1', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_-', '_1', '_,', '_j', '_)', '_;', '_pixel', 'Matrix', '_[', '_0', '_]', '_[', '_2', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_-', '_1', '_,', '_j', '_+', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_1', '_]', '_[', '_0', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_,', '_j', '_-', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_1', '_]', '_[', '_2', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_,', '_j', '_+', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_2', '_]', '_[', '_0', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_+', '_1', '_,', '_j', '_-', '_1', '_)', '_;', '_pixel', 'Matrix', '_[', '_2', '_]', '_[', '_1', '_]', '_=', '_input', '_.', '_get', 'RGB', '_(', '_i', '_+', '_1', '_,', '_j', '_)', '_;', '_pixel', '</s>']
+02/17/2024 13:48:46 - INFO - __main__ -   code_ids: 0 6 2 150 19505 1240 6085 1768 5230 400 6085 1768 1586 743 399 1920 554 626 2406 626 2406 5578 3679 385 579 554 626 995 2406 626 995 2406 2476 554 477 385 1586 746 32671 400 743 2476 554 566 385 1586 746 32720 400 743 2476 554 626 2406 626 2406 1721 385 579 554 626 566 2406 626 477 2406 2476 563 400 554 913 385 524 2476 913 517 566 581 524 2476 913 1932 743 399 563 400 554 548 385 524 2476 548 517 477 581 524 2476 548 1932 743 399 5578 3679 626 461 2406 626 461 2406 385 1586 746 744 168 400 548 581 524 2019 913 581 524 743 2476 5578 3679 626 461 2406 626 524 2406 385 1586 746 744 7664 400 548 581 524 2019 913 743 2476 5578 3679 626 461 2406 626 688 2406 385 1586 746 744 7664 400 548 581 524 2019 913 513 524 743 2476 5578 3679 626 524 2406 626 461 2406 385 1586 746 744 7664 400 548 2019 913 581 524 743 2476 5578 3679 626 524 2406 626 688 2406 385 1586 746 744 7664 400 548 2019 913 513 524 743 2476 5578 3679 626 688 2406 626 461 2406 385 1586 746 744 7664 400 548 513 524 2019 913 581 524 743 2476 5578 3679 626 688 2406 626 524 2406 385 1586 746 744 7664 400 548 513 524 2019 913 743 2476 5578 2
+02/17/2024 13:48:46 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Expect', 's', '_a', '_height', '_mat', '_as', '_input', '</s>']
+02/17/2024 13:48:46 - INFO - __main__ -   nl_ids: 0 6 2 7871 201 434 3082 5772 880 1586 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 13:48:46 - INFO - __main__ -   *** Example ***
+02/17/2024 13:48:46 - INFO - __main__ -   idx: 1
+02/17/2024 13:48:46 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'public', '_<', '_L', 'extends', 'Listener', '_>', '_void', '_pop', 'Event', '_(', '_Event', '_<', '_?', '_,', '_L', '_>', '_expected', '_)', '_{', '_synchronized', '_(', '_this', '_.', '_stack', '_)', '_{', '_final', '_Event', '_<', '_?', '_,', '_?', '_>', '_actual', '_=', '_this', '_.', '_stack', '_.', '_pop', '_(', '_)', '_;', '_if', '_(', '_actual', '_!=', '_expected', '_)', '_{', '_throw', '_new', '_IllegalStateException', '_(', '_String', '_.', '_format', '_(', '"', 'Un', 'balanced', '_pop', ':', '_expected', "_'%", 's', "'", '_but', '_encountered', "_'%", 's', "'", '"', ',', '_expected', '_.', '_get', 'Listener', 'Class', '_(', '_)', '_,', '_actual', '_)', '_)', '_;', '_}', '_}', '_}', '</s>']
+02/17/2024 13:48:46 - INFO - __main__ -   code_ids: 0 6 2 653 517 747 13125 2486 711 723 5012 1089 400 3916 517 999 2019 747 711 2048 743 399 9401 400 547 746 3325 743 399 1920 3916 517 999 2019 999 711 3780 385 547 746 3325 746 5012 400 743 2476 462 400 3780 620 2048 743 399 1185 579 16219 400 1167 746 2021 400 120 965 37707 5012 144 2048 3421 201 125 2107 17038 3421 201 125 120 130 2048 746 744 2486 1128 400 743 2019 3780 743 743 2476 425 425 425 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 13:48:46 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'P', 'ops', '_the', '_top', '_event', '_off', '_the', '_current', '_event', '_stack', '_.', '_This', '_action', '_has', '_to', '_be', '_performed', '_immediately', '_after', '_the', '_event', '_has', '_been', '_dispatched', '_to', '_all', '_listeners', '_.', '</s>']
+02/17/2024 13:48:46 - INFO - __main__ -   nl_ids: 0 6 2 166 2489 448 3194 1488 3413 448 1434 1488 3325 746 1600 2657 1559 508 661 13181 10086 2493 448 1488 1559 3022 43340 508 1345 11839 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 13:48:46 - INFO - __main__ -   *** Example ***
+02/17/2024 13:48:46 - INFO - __main__ -   idx: 2
+02/17/2024 13:48:46 - INFO - __main__ -   code_tokens: ['<s>', '<encoder-only>', '</s>', 'protected', '_void', '_modify', '_(', '_Transaction', '_t', '_)', '_{', '_try', '_{', '_this', '_.', '_lock', '_.', '_write', 'Lock', '_(', '_)', '_.', '_lock', '_(', '_)', '_;', '_t', '_.', '_perform', '_(', '_)', '_;', '_}', '_finally', '_{', '_this', '_.', '_lock', '_.', '_write', 'Lock', '_(', '_)', '_.', '_unlock', '_(', '_)', '_;', '_}', '_}', '</s>']
+02/17/2024 13:48:46 - INFO - __main__ -   code_ids: 0 6 2 1933 723 8660 400 13081 422 743 399 1568 399 547 746 3505 746 2250 2896 400 743 746 3505 400 743 2476 422 746 4729 400 743 2476 425 6110 399 547 746 3505 746 2250 2896 400 743 746 14552 400 743 2476 425 425 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 13:48:46 - INFO - __main__ -   nl_tokens: ['<s>', '<encoder-only>', '</s>', 'Executes', '_the', '_given', '_transaction', '_within', '_the', '_con', 'text', 'of', '_a', '_write', '_lock', '_.', '</s>']
+02/17/2024 13:48:46 - INFO - __main__ -   nl_ids: 0 6 2 40551 448 2076 4993 5289 448 549 625 757 434 2250 3505 746 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
+02/17/2024 13:48:46 - INFO - __main__ -   ***** Running training *****
+02/17/2024 13:48:46 - INFO - __main__ -     Num examples = 164923
+02/17/2024 13:48:46 - INFO - __main__ -     Num Epochs = 5
+02/17/2024 13:48:46 - INFO - __main__ -     Num quene = 1024
+02/17/2024 13:48:46 - INFO - __main__ -     Instantaneous batch size per GPU = 128
+02/17/2024 13:48:46 - INFO - __main__ -     Total train batch size  = 128
+02/17/2024 13:48:46 - INFO - __main__ -     Total optimization steps = 6440
+Traceback (most recent call last):
+  File "run.py", line 1188, in <module>
+    main()
+  File "run.py", line 1154, in main
+    train(args, model, tokenizer, pool)
+  File "run.py", line 585, in train
+    code_vec = model(code_inputs=code_inputs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/home/yiming/cocosoda/CoCoSoDa/model.py", line 40, in forward
+    outputs = self.encoder(code_inputs,attention_mask=code_inputs.ne(1))[0] #[bs, seq_len, dim]
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1120, in _call_impl
+    result = forward_call(*input, **kwargs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 860, in forward
+    return_dict=return_dict,
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 531, in forward
+    output_attentions,
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 415, in forward
+    past_key_value=self_attn_past_key_value,
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 344, in forward
+    output_attentions,
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/transformers/models/roberta/modeling_roberta.py", line 267, in forward
+    attention_probs = self.dropout(attention_probs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl
+    return forward_call(*input, **kwargs)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/modules/dropout.py", line 58, in forward
+    return F.dropout(input, self.p, self.training, self.inplace)
+  File "/home/yiming/anaconda3/envs/CoCoSoDa/lib/python3.6/site-packages/torch/nn/functional.py", line 1169, in dropout
+    return _VF.dropout_(input, p, training) if inplace else _VF.dropout(input, p, training)
+RuntimeError: CUDA out of memory. Tried to allocate 384.00 MiB (GPU 0; 14.75 GiB total capacity; 12.96 GiB already allocated; 173.94 MiB free; 13.02 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

saved_models/fine_tune/ruby/0/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8e9079fe0c63cf5b89b754bbcbd82424a11bf0bd9e5af6f5714c6d5a4c5a42d
+size 1524384769

saved_models/fine_tune/ruby/1/all_code_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d853b87902ece32e8427671590a29d8e69153f352fdbbcb89c9b2fa09df03f1e
+size 76575872

saved_models/fine_tune/ruby/1/all_nl_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:321dfa145a7cfc4e5176980a2354cdafea7a48043288a88c65c831b15462dbfa
+size 76575872

saved_models/fine_tune/ruby/1/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4b64b0755d42a48072d1390d69a1d0979aaa779eee94bc63444511ddd59b793
+size 1524384769

saved_models/fine_tune/ruby/1/test_all_code_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b907f0004db84850c59214fc63e8b59eddfab8127fb435712632eaec0ec07fb0
+size 3873920

saved_models/fine_tune/ruby/1/test_all_nl_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa8eecb652e9879e0cd5bef34fb8ce16adc43b88cb097f00a39ccb538861308e
+size 3873920

saved_models/fine_tune/ruby/2/all_code_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2c34ce22cef54b838cb15be43f74ce72cba04ff7c3523ec16923db2e3cc479f
+size 76575872

saved_models/fine_tune/ruby/2/all_nl_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:470bce39b439f6151ca0acabb6c5b3754f92a9f4cdd63bf29cabd88fac57e26f
+size 76575872

saved_models/fine_tune/ruby/2/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fdc86d2b72e082fc9ab35102f060ae270f952b2755d26ff492a0ec8fc5b19ee
+size 1524384769

saved_models/fine_tune/ruby/2/test_all_code_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d5eea680c7a2423d1e7e482e5850a935fd6c977c94c595992e766beba1a42a5
+size 3873920

saved_models/fine_tune/ruby/2/test_all_nl_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a30742e7e82e6d0ca1533773cf5aa747b346ca102f8526e8fa5170f56e88657a
+size 3873920

saved_models/fine_tune/ruby/3/all_code_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:70f73384f0d8a01e3c6208c534d295d2c2cd1af98dee5cd1fb7dc046fde88567
+size 76575872

saved_models/fine_tune/ruby/3/all_nl_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c8e440149458ad89a9ce02f62d2d050b51f4d7d23f1d044f53bdde25c541526
+size 76575872

saved_models/fine_tune/ruby/3/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bce2703c09f8b5c78aa2a0c31b92f80a0fca266dbab3101927552f3998c5794
+size 1524384769

saved_models/fine_tune/ruby/3/test_all_code_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9dd02d38f7aad13eea34fcdc02d24857437a5a48e5f813ef7659717edc03961
+size 3873920

saved_models/fine_tune/ruby/3/test_all_nl_vec.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d367df01e5f400124be2aefbf77eb39bc5dc575b30db1f7593bad1f6949d742
+size 3873920

saved_models/fine_tune/ruby/4/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e507b62594fe1d7cebbabf653d559acb9e9a72f9d31b57ea53721bb52e26d228
+size 1524384769

saved_models/fine_tune/ruby/checkpoint-best-mrr/model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e507b62594fe1d7cebbabf653d559acb9e9a72f9d31b57ea53721bb52e26d228
+size 1524384769

saved_models/fine_tune/ruby/docstring_list.json ADDED Viewed

The diff for this file is too large to render. See raw diff

saved_models/fine_tune/ruby/result.jsonl ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"R@1": 0.643, "R@5": 0.875, "R@10": 0.923, "eval_mrr": 0.746}

saved_models/fine_tune/ruby/running.log ADDED Viewed

	@@ -0,0 +1,5 @@

+02/20/2024 16:37:50 - INFO - __main__ -   device: cuda, n_gpu: 2
+24927
+24927
+1261
+1261