add tokenizer
Browse files- special_tokens_map.json +1 -1
- tokenizer.json +0 -81
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>"
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>"}
|
tokenizer.json
CHANGED
@@ -2063,87 +2063,6 @@
|
|
2063 |
"rstrip": false,
|
2064 |
"normalized": false,
|
2065 |
"special": true
|
2066 |
-
},
|
2067 |
-
{
|
2068 |
-
"id": 30000,
|
2069 |
-
"content": "<P01>",
|
2070 |
-
"single_word": false,
|
2071 |
-
"lstrip": false,
|
2072 |
-
"rstrip": false,
|
2073 |
-
"normalized": false,
|
2074 |
-
"special": true
|
2075 |
-
},
|
2076 |
-
{
|
2077 |
-
"id": 30001,
|
2078 |
-
"content": "<P02>",
|
2079 |
-
"single_word": false,
|
2080 |
-
"lstrip": false,
|
2081 |
-
"rstrip": false,
|
2082 |
-
"normalized": false,
|
2083 |
-
"special": true
|
2084 |
-
},
|
2085 |
-
{
|
2086 |
-
"id": 30002,
|
2087 |
-
"content": "<P03>",
|
2088 |
-
"single_word": false,
|
2089 |
-
"lstrip": false,
|
2090 |
-
"rstrip": false,
|
2091 |
-
"normalized": false,
|
2092 |
-
"special": true
|
2093 |
-
},
|
2094 |
-
{
|
2095 |
-
"id": 30003,
|
2096 |
-
"content": "<P04>",
|
2097 |
-
"single_word": false,
|
2098 |
-
"lstrip": false,
|
2099 |
-
"rstrip": false,
|
2100 |
-
"normalized": false,
|
2101 |
-
"special": true
|
2102 |
-
},
|
2103 |
-
{
|
2104 |
-
"id": 30004,
|
2105 |
-
"content": "<P05>",
|
2106 |
-
"single_word": false,
|
2107 |
-
"lstrip": false,
|
2108 |
-
"rstrip": false,
|
2109 |
-
"normalized": false,
|
2110 |
-
"special": true
|
2111 |
-
},
|
2112 |
-
{
|
2113 |
-
"id": 30005,
|
2114 |
-
"content": "<P06>",
|
2115 |
-
"single_word": false,
|
2116 |
-
"lstrip": false,
|
2117 |
-
"rstrip": false,
|
2118 |
-
"normalized": false,
|
2119 |
-
"special": true
|
2120 |
-
},
|
2121 |
-
{
|
2122 |
-
"id": 30006,
|
2123 |
-
"content": "<P07>",
|
2124 |
-
"single_word": false,
|
2125 |
-
"lstrip": false,
|
2126 |
-
"rstrip": false,
|
2127 |
-
"normalized": false,
|
2128 |
-
"special": true
|
2129 |
-
},
|
2130 |
-
{
|
2131 |
-
"id": 30007,
|
2132 |
-
"content": "<P08>",
|
2133 |
-
"single_word": false,
|
2134 |
-
"lstrip": false,
|
2135 |
-
"rstrip": false,
|
2136 |
-
"normalized": false,
|
2137 |
-
"special": true
|
2138 |
-
},
|
2139 |
-
{
|
2140 |
-
"id": 30008,
|
2141 |
-
"content": "<P09>",
|
2142 |
-
"single_word": false,
|
2143 |
-
"lstrip": false,
|
2144 |
-
"rstrip": false,
|
2145 |
-
"normalized": false,
|
2146 |
-
"special": true
|
2147 |
}
|
2148 |
],
|
2149 |
"normalizer": {
|
|
|
2063 |
"rstrip": false,
|
2064 |
"normalized": false,
|
2065 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2066 |
}
|
2067 |
],
|
2068 |
"normalizer": {
|