Add model and tokenizer files
Browse files- added_tokens.json +118 -0
- config.json +98 -0
- merges.txt +0 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +56 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.json +0 -0
added_tokens.json
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<add>": 32101,
|
3 |
+
"<c>": 32213,
|
4 |
+
"<c_plus_plus>": 32215,
|
5 |
+
"<c_sharp>": 32214,
|
6 |
+
"<del>": 32102,
|
7 |
+
"<e0>": 32204,
|
8 |
+
"<e10>": 32194,
|
9 |
+
"<e11>": 32193,
|
10 |
+
"<e12>": 32192,
|
11 |
+
"<e13>": 32191,
|
12 |
+
"<e14>": 32190,
|
13 |
+
"<e15>": 32189,
|
14 |
+
"<e16>": 32188,
|
15 |
+
"<e17>": 32187,
|
16 |
+
"<e18>": 32186,
|
17 |
+
"<e19>": 32185,
|
18 |
+
"<e1>": 32203,
|
19 |
+
"<e20>": 32184,
|
20 |
+
"<e21>": 32183,
|
21 |
+
"<e22>": 32182,
|
22 |
+
"<e23>": 32181,
|
23 |
+
"<e24>": 32180,
|
24 |
+
"<e25>": 32179,
|
25 |
+
"<e26>": 32178,
|
26 |
+
"<e27>": 32177,
|
27 |
+
"<e28>": 32176,
|
28 |
+
"<e29>": 32175,
|
29 |
+
"<e2>": 32202,
|
30 |
+
"<e30>": 32174,
|
31 |
+
"<e31>": 32173,
|
32 |
+
"<e32>": 32172,
|
33 |
+
"<e33>": 32171,
|
34 |
+
"<e34>": 32170,
|
35 |
+
"<e35>": 32169,
|
36 |
+
"<e36>": 32168,
|
37 |
+
"<e37>": 32167,
|
38 |
+
"<e38>": 32166,
|
39 |
+
"<e39>": 32165,
|
40 |
+
"<e3>": 32201,
|
41 |
+
"<e40>": 32164,
|
42 |
+
"<e41>": 32163,
|
43 |
+
"<e42>": 32162,
|
44 |
+
"<e43>": 32161,
|
45 |
+
"<e44>": 32160,
|
46 |
+
"<e45>": 32159,
|
47 |
+
"<e46>": 32158,
|
48 |
+
"<e47>": 32157,
|
49 |
+
"<e48>": 32156,
|
50 |
+
"<e49>": 32155,
|
51 |
+
"<e4>": 32200,
|
52 |
+
"<e50>": 32154,
|
53 |
+
"<e51>": 32153,
|
54 |
+
"<e52>": 32152,
|
55 |
+
"<e53>": 32151,
|
56 |
+
"<e54>": 32150,
|
57 |
+
"<e55>": 32149,
|
58 |
+
"<e56>": 32148,
|
59 |
+
"<e57>": 32147,
|
60 |
+
"<e58>": 32146,
|
61 |
+
"<e59>": 32145,
|
62 |
+
"<e5>": 32199,
|
63 |
+
"<e60>": 32144,
|
64 |
+
"<e61>": 32143,
|
65 |
+
"<e62>": 32142,
|
66 |
+
"<e63>": 32141,
|
67 |
+
"<e64>": 32140,
|
68 |
+
"<e65>": 32139,
|
69 |
+
"<e66>": 32138,
|
70 |
+
"<e67>": 32137,
|
71 |
+
"<e68>": 32136,
|
72 |
+
"<e69>": 32135,
|
73 |
+
"<e6>": 32198,
|
74 |
+
"<e70>": 32134,
|
75 |
+
"<e71>": 32133,
|
76 |
+
"<e72>": 32132,
|
77 |
+
"<e73>": 32131,
|
78 |
+
"<e74>": 32130,
|
79 |
+
"<e75>": 32129,
|
80 |
+
"<e76>": 32128,
|
81 |
+
"<e77>": 32127,
|
82 |
+
"<e78>": 32126,
|
83 |
+
"<e79>": 32125,
|
84 |
+
"<e7>": 32197,
|
85 |
+
"<e80>": 32124,
|
86 |
+
"<e81>": 32123,
|
87 |
+
"<e82>": 32122,
|
88 |
+
"<e83>": 32121,
|
89 |
+
"<e84>": 32120,
|
90 |
+
"<e85>": 32119,
|
91 |
+
"<e86>": 32118,
|
92 |
+
"<e87>": 32117,
|
93 |
+
"<e88>": 32116,
|
94 |
+
"<e89>": 32115,
|
95 |
+
"<e8>": 32196,
|
96 |
+
"<e90>": 32114,
|
97 |
+
"<e91>": 32113,
|
98 |
+
"<e92>": 32112,
|
99 |
+
"<e93>": 32111,
|
100 |
+
"<e94>": 32110,
|
101 |
+
"<e95>": 32109,
|
102 |
+
"<e96>": 32108,
|
103 |
+
"<e97>": 32107,
|
104 |
+
"<e98>": 32106,
|
105 |
+
"<e99>": 32105,
|
106 |
+
"<e9>": 32195,
|
107 |
+
"<en>": 32206,
|
108 |
+
"<end>": 32104,
|
109 |
+
"<go>": 32212,
|
110 |
+
"<java>": 32208,
|
111 |
+
"<javascript>": 32209,
|
112 |
+
"<keep>": 32100,
|
113 |
+
"<msg>": 32205,
|
114 |
+
"<php>": 32211,
|
115 |
+
"<python>": 32207,
|
116 |
+
"<ruby>": 32210,
|
117 |
+
"<start>": 32103
|
118 |
+
}
|
config.json
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/codereviewer",
|
3 |
+
"add_token_id": 32101,
|
4 |
+
"architectures": [
|
5 |
+
"T5ForConditionalGeneration"
|
6 |
+
],
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"d_ff": 3072,
|
9 |
+
"d_kv": 64,
|
10 |
+
"d_model": 768,
|
11 |
+
"decoder_start_token_id": 0,
|
12 |
+
"del_token_id": 32102,
|
13 |
+
"dense_act_fn": "relu",
|
14 |
+
"dropout_rate": 0.1,
|
15 |
+
"end_token_id": 32104,
|
16 |
+
"eos_token_id": 2,
|
17 |
+
"feed_forward_proj": "relu",
|
18 |
+
"gradient_checkpointing": false,
|
19 |
+
"id2label": {
|
20 |
+
"0": "LABEL_0"
|
21 |
+
},
|
22 |
+
"initializer_factor": 1.0,
|
23 |
+
"is_encoder_decoder": true,
|
24 |
+
"is_gated_act": false,
|
25 |
+
"keep_token_id": 32100,
|
26 |
+
"label2id": {
|
27 |
+
"LABEL_0": 0
|
28 |
+
},
|
29 |
+
"lang_id": {
|
30 |
+
"<c>": 32213,
|
31 |
+
"<c_plus_plus>": 32215,
|
32 |
+
"<c_sharp>": 32214,
|
33 |
+
"<en>": 32206,
|
34 |
+
"<go>": 32212,
|
35 |
+
"<java>": 32208,
|
36 |
+
"<javascript>": 32209,
|
37 |
+
"<php>": 32211,
|
38 |
+
"<python>": 32207,
|
39 |
+
"<ruby>": 32210
|
40 |
+
},
|
41 |
+
"lang_tokens": [
|
42 |
+
"<en>",
|
43 |
+
"<python>",
|
44 |
+
"<java>",
|
45 |
+
"<javascript>",
|
46 |
+
"<ruby>",
|
47 |
+
"<php>",
|
48 |
+
"<go>",
|
49 |
+
"<c>",
|
50 |
+
"<c_sharp>",
|
51 |
+
"<c_plus_plus>"
|
52 |
+
],
|
53 |
+
"layer_norm_epsilon": 1e-06,
|
54 |
+
"mask_token_id": 4,
|
55 |
+
"model_type": "t5",
|
56 |
+
"n_positions": 512,
|
57 |
+
"num_decoder_layers": 12,
|
58 |
+
"num_heads": 12,
|
59 |
+
"num_layers": 12,
|
60 |
+
"output_past": true,
|
61 |
+
"pad_token_id": 0,
|
62 |
+
"relative_attention_max_distance": 128,
|
63 |
+
"relative_attention_num_buckets": 32,
|
64 |
+
"start_token_id": 32103,
|
65 |
+
"task_specific_params": {
|
66 |
+
"summarization": {
|
67 |
+
"early_stopping": true,
|
68 |
+
"length_penalty": 2.0,
|
69 |
+
"max_length": 200,
|
70 |
+
"min_length": 30,
|
71 |
+
"no_repeat_ngram_size": 3,
|
72 |
+
"num_beams": 4,
|
73 |
+
"prefix": "summarize: "
|
74 |
+
},
|
75 |
+
"translation_en_to_de": {
|
76 |
+
"early_stopping": true,
|
77 |
+
"max_length": 300,
|
78 |
+
"num_beams": 4,
|
79 |
+
"prefix": "translate English to German: "
|
80 |
+
},
|
81 |
+
"translation_en_to_fr": {
|
82 |
+
"early_stopping": true,
|
83 |
+
"max_length": 300,
|
84 |
+
"num_beams": 4,
|
85 |
+
"prefix": "translate English to French: "
|
86 |
+
},
|
87 |
+
"translation_en_to_ro": {
|
88 |
+
"early_stopping": true,
|
89 |
+
"max_length": 300,
|
90 |
+
"num_beams": 4,
|
91 |
+
"prefix": "translate English to Romanian: "
|
92 |
+
}
|
93 |
+
},
|
94 |
+
"torch_dtype": "float32",
|
95 |
+
"transformers_version": "4.21.3",
|
96 |
+
"use_cache": true,
|
97 |
+
"vocab_size": 32216
|
98 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c37503607ff56790e49418536b1d9e59b08ecfd0f27aa5e037bfc48d4d8a2da1
|
3 |
+
size 891967487
|
special_tokens_map.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<add>",
|
4 |
+
"<del>",
|
5 |
+
"<keep>"
|
6 |
+
],
|
7 |
+
"bos_token": {
|
8 |
+
"content": "<s>",
|
9 |
+
"lstrip": false,
|
10 |
+
"normalized": true,
|
11 |
+
"rstrip": false,
|
12 |
+
"single_word": false
|
13 |
+
},
|
14 |
+
"cls_token": {
|
15 |
+
"content": "<s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": true,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"eos_token": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": true,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false
|
27 |
+
},
|
28 |
+
"mask_token": {
|
29 |
+
"content": "<mask>",
|
30 |
+
"lstrip": true,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false
|
34 |
+
},
|
35 |
+
"pad_token": {
|
36 |
+
"content": "<pad>",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": true,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false
|
41 |
+
},
|
42 |
+
"sep_token": {
|
43 |
+
"content": "</s>",
|
44 |
+
"lstrip": false,
|
45 |
+
"normalized": true,
|
46 |
+
"rstrip": false,
|
47 |
+
"single_word": false
|
48 |
+
},
|
49 |
+
"unk_token": {
|
50 |
+
"content": "<unk>",
|
51 |
+
"lstrip": false,
|
52 |
+
"normalized": true,
|
53 |
+
"rstrip": false,
|
54 |
+
"single_word": false
|
55 |
+
}
|
56 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": {
|
4 |
+
"__type": "AddedToken",
|
5 |
+
"content": "<s>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false
|
10 |
+
},
|
11 |
+
"cls_token": {
|
12 |
+
"__type": "AddedToken",
|
13 |
+
"content": "<s>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": true,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false
|
18 |
+
},
|
19 |
+
"eos_token": {
|
20 |
+
"__type": "AddedToken",
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false
|
26 |
+
},
|
27 |
+
"errors": "replace",
|
28 |
+
"mask_token": {
|
29 |
+
"__type": "AddedToken",
|
30 |
+
"content": "<mask>",
|
31 |
+
"lstrip": true,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false
|
35 |
+
},
|
36 |
+
"model_max_length": 512,
|
37 |
+
"name_or_path": "microsoft/codereviewer",
|
38 |
+
"pad_token": {
|
39 |
+
"__type": "AddedToken",
|
40 |
+
"content": "<pad>",
|
41 |
+
"lstrip": false,
|
42 |
+
"normalized": true,
|
43 |
+
"rstrip": false,
|
44 |
+
"single_word": false
|
45 |
+
},
|
46 |
+
"sep_token": {
|
47 |
+
"__type": "AddedToken",
|
48 |
+
"content": "</s>",
|
49 |
+
"lstrip": false,
|
50 |
+
"normalized": true,
|
51 |
+
"rstrip": false,
|
52 |
+
"single_word": false
|
53 |
+
},
|
54 |
+
"special_tokens_map_file": "/home/shuailu/lushuai/code_review/PreViewer/pretrained_models/codet5/special_tokens_map.json",
|
55 |
+
"tokenizer_class": "RobertaTokenizer",
|
56 |
+
"trim_offsets": true,
|
57 |
+
"unk_token": {
|
58 |
+
"__type": "AddedToken",
|
59 |
+
"content": "<unk>",
|
60 |
+
"lstrip": false,
|
61 |
+
"normalized": true,
|
62 |
+
"rstrip": false,
|
63 |
+
"single_word": false
|
64 |
+
}
|
65 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|