alimboff commited on
Commit
33c0a83
1 Parent(s): 7285833

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "__kbd__": 128104
3
+ }
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "models/m2m_v2/m2m_kbd_one",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "M2M100ForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 0,
10
+ "d_model": 1024,
11
+ "decoder_attention_heads": 16,
12
+ "decoder_ffn_dim": 4096,
13
+ "decoder_layerdrop": 0.05,
14
+ "decoder_layers": 12,
15
+ "decoder_start_token_id": 2,
16
+ "dropout": 0.1,
17
+ "early_stopping": true,
18
+ "encoder_attention_heads": 16,
19
+ "encoder_ffn_dim": 4096,
20
+ "encoder_layerdrop": 0.05,
21
+ "encoder_layers": 12,
22
+ "eos_token_id": 2,
23
+ "gradient_checkpointing": false,
24
+ "init_std": 0.02,
25
+ "is_encoder_decoder": true,
26
+ "max_length": 200,
27
+ "max_position_embeddings": 1024,
28
+ "model_type": "m2m_100",
29
+ "num_beams": 5,
30
+ "num_hidden_layers": 12,
31
+ "pad_token_id": 1,
32
+ "scale_embedding": true,
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.33.0",
35
+ "use_cache": true,
36
+ "vocab_size": 128106
37
+ }
generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "decoder_start_token_id": 2,
5
+ "early_stopping": true,
6
+ "eos_token_id": 2,
7
+ "max_length": 200,
8
+ "num_beams": 5,
9
+ "pad_token_id": 1,
10
+ "transformers_version": "4.33.0"
11
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:280c2f86bb460a2b98d7064ca16e278d4d7617c37755b7ba9f2b136d0a66757c
3
+ size 1935771582
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8f7c76ed2a5e0822be39f0a4f95a55eb19c78f4593ce609e2edbc2aea4d380a
3
+ size 2423393
special_tokens_map.json ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "__kbd__",
4
+ "__af__",
5
+ "__am__",
6
+ "__ar__",
7
+ "__ast__",
8
+ "__az__",
9
+ "__ba__",
10
+ "__be__",
11
+ "__bg__",
12
+ "__bn__",
13
+ "__br__",
14
+ "__bs__",
15
+ "__ca__",
16
+ "__ceb__",
17
+ "__cs__",
18
+ "__cy__",
19
+ "__da__",
20
+ "__de__",
21
+ "__el__",
22
+ "__en__",
23
+ "__es__",
24
+ "__et__",
25
+ "__fa__",
26
+ "__ff__",
27
+ "__fi__",
28
+ "__fr__",
29
+ "__fy__",
30
+ "__ga__",
31
+ "__gd__",
32
+ "__gl__",
33
+ "__gu__",
34
+ "__ha__",
35
+ "__he__",
36
+ "__hi__",
37
+ "__hr__",
38
+ "__ht__",
39
+ "__hu__",
40
+ "__hy__",
41
+ "__id__",
42
+ "__ig__",
43
+ "__ilo__",
44
+ "__is__",
45
+ "__it__",
46
+ "__ja__",
47
+ "__jv__",
48
+ "__ka__",
49
+ "__kk__",
50
+ "__km__",
51
+ "__kn__",
52
+ "__ko__",
53
+ "__lb__",
54
+ "__lg__",
55
+ "__ln__",
56
+ "__lo__",
57
+ "__lt__",
58
+ "__lv__",
59
+ "__mg__",
60
+ "__mk__",
61
+ "__ml__",
62
+ "__mn__",
63
+ "__mr__",
64
+ "__ms__",
65
+ "__my__",
66
+ "__ne__",
67
+ "__nl__",
68
+ "__no__",
69
+ "__ns__",
70
+ "__oc__",
71
+ "__or__",
72
+ "__pa__",
73
+ "__pl__",
74
+ "__ps__",
75
+ "__pt__",
76
+ "__ro__",
77
+ "__ru__",
78
+ "__sd__",
79
+ "__si__",
80
+ "__sk__",
81
+ "__sl__",
82
+ "__so__",
83
+ "__sq__",
84
+ "__sr__",
85
+ "__ss__",
86
+ "__su__",
87
+ "__sv__",
88
+ "__sw__",
89
+ "__ta__",
90
+ "__th__",
91
+ "__tl__",
92
+ "__tn__",
93
+ "__tr__",
94
+ "__uk__",
95
+ "__ur__",
96
+ "__uz__",
97
+ "__vi__",
98
+ "__wo__",
99
+ "__xh__",
100
+ "__yi__",
101
+ "__yo__",
102
+ "__zh__",
103
+ "__zu__"
104
+ ],
105
+ "bos_token": "<s>",
106
+ "eos_token": "</s>",
107
+ "pad_token": "<pad>",
108
+ "sep_token": "</s>",
109
+ "unk_token": "<unk>"
110
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "__kbd__",
4
+ "__af__",
5
+ "__am__",
6
+ "__ar__",
7
+ "__ast__",
8
+ "__az__",
9
+ "__ba__",
10
+ "__be__",
11
+ "__bg__",
12
+ "__bn__",
13
+ "__br__",
14
+ "__bs__",
15
+ "__ca__",
16
+ "__ceb__",
17
+ "__cs__",
18
+ "__cy__",
19
+ "__da__",
20
+ "__de__",
21
+ "__el__",
22
+ "__en__",
23
+ "__es__",
24
+ "__et__",
25
+ "__fa__",
26
+ "__ff__",
27
+ "__fi__",
28
+ "__fr__",
29
+ "__fy__",
30
+ "__ga__",
31
+ "__gd__",
32
+ "__gl__",
33
+ "__gu__",
34
+ "__ha__",
35
+ "__he__",
36
+ "__hi__",
37
+ "__hr__",
38
+ "__ht__",
39
+ "__hu__",
40
+ "__hy__",
41
+ "__id__",
42
+ "__ig__",
43
+ "__ilo__",
44
+ "__is__",
45
+ "__it__",
46
+ "__ja__",
47
+ "__jv__",
48
+ "__ka__",
49
+ "__kk__",
50
+ "__km__",
51
+ "__kn__",
52
+ "__ko__",
53
+ "__lb__",
54
+ "__lg__",
55
+ "__ln__",
56
+ "__lo__",
57
+ "__lt__",
58
+ "__lv__",
59
+ "__mg__",
60
+ "__mk__",
61
+ "__ml__",
62
+ "__mn__",
63
+ "__mr__",
64
+ "__ms__",
65
+ "__my__",
66
+ "__ne__",
67
+ "__nl__",
68
+ "__no__",
69
+ "__ns__",
70
+ "__oc__",
71
+ "__or__",
72
+ "__pa__",
73
+ "__pl__",
74
+ "__ps__",
75
+ "__pt__",
76
+ "__ro__",
77
+ "__ru__",
78
+ "__sd__",
79
+ "__si__",
80
+ "__sk__",
81
+ "__sl__",
82
+ "__so__",
83
+ "__sq__",
84
+ "__sr__",
85
+ "__ss__",
86
+ "__su__",
87
+ "__sv__",
88
+ "__sw__",
89
+ "__ta__",
90
+ "__th__",
91
+ "__tl__",
92
+ "__tn__",
93
+ "__tr__",
94
+ "__uk__",
95
+ "__ur__",
96
+ "__uz__",
97
+ "__vi__",
98
+ "__wo__",
99
+ "__xh__",
100
+ "__yi__",
101
+ "__yo__",
102
+ "__zh__",
103
+ "__zu__"
104
+ ],
105
+ "bos_token": "<s>",
106
+ "clean_up_tokenization_spaces": true,
107
+ "eos_token": "</s>",
108
+ "language_codes": "m2m100",
109
+ "model_max_length": 1024,
110
+ "num_madeup_words": 8,
111
+ "pad_token": "<pad>",
112
+ "sep_token": "</s>",
113
+ "sp_model_kwargs": {},
114
+ "src_lang": null,
115
+ "tgt_lang": null,
116
+ "tokenizer_class": "M2M100Tokenizer",
117
+ "tokenizer_file": null,
118
+ "unk_token": "<unk>"
119
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff