saridormi commited on
Commit
1fe53a0
1 Parent(s): b183209

Add model and tokenizer files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<add>": 32101,
3
+ "<c>": 32213,
4
+ "<c_plus_plus>": 32215,
5
+ "<c_sharp>": 32214,
6
+ "<del>": 32102,
7
+ "<e0>": 32204,
8
+ "<e10>": 32194,
9
+ "<e11>": 32193,
10
+ "<e12>": 32192,
11
+ "<e13>": 32191,
12
+ "<e14>": 32190,
13
+ "<e15>": 32189,
14
+ "<e16>": 32188,
15
+ "<e17>": 32187,
16
+ "<e18>": 32186,
17
+ "<e19>": 32185,
18
+ "<e1>": 32203,
19
+ "<e20>": 32184,
20
+ "<e21>": 32183,
21
+ "<e22>": 32182,
22
+ "<e23>": 32181,
23
+ "<e24>": 32180,
24
+ "<e25>": 32179,
25
+ "<e26>": 32178,
26
+ "<e27>": 32177,
27
+ "<e28>": 32176,
28
+ "<e29>": 32175,
29
+ "<e2>": 32202,
30
+ "<e30>": 32174,
31
+ "<e31>": 32173,
32
+ "<e32>": 32172,
33
+ "<e33>": 32171,
34
+ "<e34>": 32170,
35
+ "<e35>": 32169,
36
+ "<e36>": 32168,
37
+ "<e37>": 32167,
38
+ "<e38>": 32166,
39
+ "<e39>": 32165,
40
+ "<e3>": 32201,
41
+ "<e40>": 32164,
42
+ "<e41>": 32163,
43
+ "<e42>": 32162,
44
+ "<e43>": 32161,
45
+ "<e44>": 32160,
46
+ "<e45>": 32159,
47
+ "<e46>": 32158,
48
+ "<e47>": 32157,
49
+ "<e48>": 32156,
50
+ "<e49>": 32155,
51
+ "<e4>": 32200,
52
+ "<e50>": 32154,
53
+ "<e51>": 32153,
54
+ "<e52>": 32152,
55
+ "<e53>": 32151,
56
+ "<e54>": 32150,
57
+ "<e55>": 32149,
58
+ "<e56>": 32148,
59
+ "<e57>": 32147,
60
+ "<e58>": 32146,
61
+ "<e59>": 32145,
62
+ "<e5>": 32199,
63
+ "<e60>": 32144,
64
+ "<e61>": 32143,
65
+ "<e62>": 32142,
66
+ "<e63>": 32141,
67
+ "<e64>": 32140,
68
+ "<e65>": 32139,
69
+ "<e66>": 32138,
70
+ "<e67>": 32137,
71
+ "<e68>": 32136,
72
+ "<e69>": 32135,
73
+ "<e6>": 32198,
74
+ "<e70>": 32134,
75
+ "<e71>": 32133,
76
+ "<e72>": 32132,
77
+ "<e73>": 32131,
78
+ "<e74>": 32130,
79
+ "<e75>": 32129,
80
+ "<e76>": 32128,
81
+ "<e77>": 32127,
82
+ "<e78>": 32126,
83
+ "<e79>": 32125,
84
+ "<e7>": 32197,
85
+ "<e80>": 32124,
86
+ "<e81>": 32123,
87
+ "<e82>": 32122,
88
+ "<e83>": 32121,
89
+ "<e84>": 32120,
90
+ "<e85>": 32119,
91
+ "<e86>": 32118,
92
+ "<e87>": 32117,
93
+ "<e88>": 32116,
94
+ "<e89>": 32115,
95
+ "<e8>": 32196,
96
+ "<e90>": 32114,
97
+ "<e91>": 32113,
98
+ "<e92>": 32112,
99
+ "<e93>": 32111,
100
+ "<e94>": 32110,
101
+ "<e95>": 32109,
102
+ "<e96>": 32108,
103
+ "<e97>": 32107,
104
+ "<e98>": 32106,
105
+ "<e99>": 32105,
106
+ "<e9>": 32195,
107
+ "<en>": 32206,
108
+ "<end>": 32104,
109
+ "<go>": 32212,
110
+ "<java>": 32208,
111
+ "<javascript>": 32209,
112
+ "<keep>": 32100,
113
+ "<msg>": 32205,
114
+ "<php>": 32211,
115
+ "<python>": 32207,
116
+ "<ruby>": 32210,
117
+ "<start>": 32103
118
+ }
config.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/codereviewer",
3
+ "add_token_id": 32101,
4
+ "architectures": [
5
+ "T5ForConditionalGeneration"
6
+ ],
7
+ "bos_token_id": 1,
8
+ "d_ff": 3072,
9
+ "d_kv": 64,
10
+ "d_model": 768,
11
+ "decoder_start_token_id": 0,
12
+ "del_token_id": 32102,
13
+ "dense_act_fn": "relu",
14
+ "dropout_rate": 0.1,
15
+ "end_token_id": 32104,
16
+ "eos_token_id": 2,
17
+ "feed_forward_proj": "relu",
18
+ "gradient_checkpointing": false,
19
+ "id2label": {
20
+ "0": "LABEL_0"
21
+ },
22
+ "initializer_factor": 1.0,
23
+ "is_encoder_decoder": true,
24
+ "is_gated_act": false,
25
+ "keep_token_id": 32100,
26
+ "label2id": {
27
+ "LABEL_0": 0
28
+ },
29
+ "lang_id": {
30
+ "<c>": 32213,
31
+ "<c_plus_plus>": 32215,
32
+ "<c_sharp>": 32214,
33
+ "<en>": 32206,
34
+ "<go>": 32212,
35
+ "<java>": 32208,
36
+ "<javascript>": 32209,
37
+ "<php>": 32211,
38
+ "<python>": 32207,
39
+ "<ruby>": 32210
40
+ },
41
+ "lang_tokens": [
42
+ "<en>",
43
+ "<python>",
44
+ "<java>",
45
+ "<javascript>",
46
+ "<ruby>",
47
+ "<php>",
48
+ "<go>",
49
+ "<c>",
50
+ "<c_sharp>",
51
+ "<c_plus_plus>"
52
+ ],
53
+ "layer_norm_epsilon": 1e-06,
54
+ "mask_token_id": 4,
55
+ "model_type": "t5",
56
+ "n_positions": 512,
57
+ "num_decoder_layers": 12,
58
+ "num_heads": 12,
59
+ "num_layers": 12,
60
+ "output_past": true,
61
+ "pad_token_id": 0,
62
+ "relative_attention_max_distance": 128,
63
+ "relative_attention_num_buckets": 32,
64
+ "start_token_id": 32103,
65
+ "task_specific_params": {
66
+ "summarization": {
67
+ "early_stopping": true,
68
+ "length_penalty": 2.0,
69
+ "max_length": 200,
70
+ "min_length": 30,
71
+ "no_repeat_ngram_size": 3,
72
+ "num_beams": 4,
73
+ "prefix": "summarize: "
74
+ },
75
+ "translation_en_to_de": {
76
+ "early_stopping": true,
77
+ "max_length": 300,
78
+ "num_beams": 4,
79
+ "prefix": "translate English to German: "
80
+ },
81
+ "translation_en_to_fr": {
82
+ "early_stopping": true,
83
+ "max_length": 300,
84
+ "num_beams": 4,
85
+ "prefix": "translate English to French: "
86
+ },
87
+ "translation_en_to_ro": {
88
+ "early_stopping": true,
89
+ "max_length": 300,
90
+ "num_beams": 4,
91
+ "prefix": "translate English to Romanian: "
92
+ }
93
+ },
94
+ "torch_dtype": "float32",
95
+ "transformers_version": "4.21.3",
96
+ "use_cache": true,
97
+ "vocab_size": 32216
98
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c37503607ff56790e49418536b1d9e59b08ecfd0f27aa5e037bfc48d4d8a2da1
3
+ size 891967487
special_tokens_map.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<add>",
4
+ "<del>",
5
+ "<keep>"
6
+ ],
7
+ "bos_token": {
8
+ "content": "<s>",
9
+ "lstrip": false,
10
+ "normalized": true,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "cls_token": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "eos_token": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "mask_token": {
29
+ "content": "<mask>",
30
+ "lstrip": true,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ },
35
+ "pad_token": {
36
+ "content": "<pad>",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false
41
+ },
42
+ "sep_token": {
43
+ "content": "</s>",
44
+ "lstrip": false,
45
+ "normalized": true,
46
+ "rstrip": false,
47
+ "single_word": false
48
+ },
49
+ "unk_token": {
50
+ "content": "<unk>",
51
+ "lstrip": false,
52
+ "normalized": true,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ }
56
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "errors": "replace",
28
+ "mask_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<mask>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "model_max_length": 512,
37
+ "name_or_path": "microsoft/codereviewer",
38
+ "pad_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<pad>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "sep_token": {
47
+ "__type": "AddedToken",
48
+ "content": "</s>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "special_tokens_map_file": "/home/shuailu/lushuai/code_review/PreViewer/pretrained_models/codet5/special_tokens_map.json",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": {
58
+ "__type": "AddedToken",
59
+ "content": "<unk>",
60
+ "lstrip": false,
61
+ "normalized": true,
62
+ "rstrip": false,
63
+ "single_word": false
64
+ }
65
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff