goldfish-models
commited on
Commit
•
5bbf2d3
1
Parent(s):
660dfba
Upload srn_latn_10mb tokenizer.
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -0
- spiece.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"[XXXXX80]": 22439, "[XXXXX59]": 22418, "[XXXXX103]": 22462, "[XXXXX145]": 22504, "[XXXXX96]": 22455, "[XXXXX92]": 22451, "[XXXXX47]": 22406, "[XXXXX100]": 22459, "[XXXXX113]": 22472, "[XXXXX136]": 22495, "[XXXXX83]": 22442, "[XXXXX40]": 22399, "[XXXXX57]": 22416, "[XXXXX73]": 22432, "[XXXXX26]": 22385, "[XXXXX146]": 22505, "[XXXXX85]": 22444, "[XXXXX62]": 22421, "[XXXXX75]": 22434, "[XXXXX79]": 22438, "[XXXXX58]": 22417, "[XXXXX32]": 22391, "[XXXXX123]": 22482, "[XXXXX8]": 22367, "[XXXXX21]": 22380, "[XXXXX44]": 22403, "[XXXXX152]": 22511, "[XXXXX78]": 22437, "[XXXXX46]": 22405, "[XXXXX20]": 22379, "[XXXXX68]": 22427, "[XXXXX15]": 22374, "[XXXXX71]": 22430, "[XXXXX122]": 22481, "[XXXXX153]": 22512, "[XXXXX19]": 22378, "[XXXXX104]": 22463, "[XXXXX94]": 22453, "[XXXXX66]": 22425, "[XXXXX91]": 22450, "[XXXXX76]": 22435, "[XXXXX162]": 22521, "[XXXXX116]": 22475, "[XXXXX24]": 22383, "[XXXXX6]": 22365, "[XXXXX107]": 22466, "[XXXXX121]": 22480, "[XXXXX155]": 22514, "[XXXXX54]": 22413, "[XXXXX87]": 22446, "[XXXXX97]": 22456, "[XXXXX163]": 22522, "[XXXXX38]": 22397, "[XXXXX115]": 22474, "[XXXXX114]": 22473, "[XXXXX30]": 22389, "[XXXXX64]": 22423, "[XXXXX149]": 22508, "[XXXXX165]": 22524, "[XXXXX29]": 22388, "[XXXXX125]": 22484, "[XXXXX126]": 22485, "[XXXXX16]": 22375, "[XXXXX130]": 22489, "[XXXXX129]": 22488, "[XXXXX88]": 22447, "[XXXXX42]": 22401, "[XXXXX154]": 22513, "[XXXXX93]": 22452, "[XXXXX166]": 22525, "[XXXXX34]": 22393, "[XXXXX33]": 22392, "[XXXXX61]": 22420, "<pad>": 22357, "[XXXXX101]": 22460, "[XXXXX9]": 22368, "[XXXXX43]": 22402, "[XXXXX0]": 22359, "[CLS]": 22355, "[XXXXX157]": 22516, "[XXXXX82]": 22441, "[XXXXX95]": 22454, "[XXXXX161]": 22520, "[XXXXX35]": 22394, "[XXXXX89]": 22448, "[XXXXX164]": 22523, "[XXXXX4]": 22363, "[XXXXX139]": 22498, "[XXXXX17]": 22376, "[XXXXX14]": 22373, "[XXXXX102]": 22461, "[XXXXX11]": 22370, "[XXXXX69]": 22428, "[XXXXX10]": 22369, "[XXXXX111]": 22470, "[XXXXX37]": 22396, "[XXXXX167]": 22526, "[XXXXX28]": 22387, "[XXXXX159]": 22518, "[XXXXX140]": 22499, "[XXXXX55]": 22414, "[XXXXX118]": 22477, "[XXXXX1]": 22360, "[SEP]": 22356, "[XXXXX128]": 22487, "[XXXXX31]": 22390, "[XXXXX132]": 22491, "[XXXXX109]": 22468, "[XXXXX135]": 22494, "[XXXXX51]": 22410, "[XXXXX70]": 22429, "[XXXXX7]": 22366, "[XXXXX81]": 22440, "[XXXXX133]": 22492, "[XXXXX99]": 22458, "[XXXXX106]": 22465, "[XXXXX52]": 22411, "[XXXXX50]": 22409, "[XXXXX160]": 22519, "[XXXXX143]": 22502, "[XXXXX90]": 22449, "[MASK]": 22358, "[XXXXX150]": 22509, "[XXXXX112]": 22471, "[XXXXX18]": 22377, "[XXXXX41]": 22400, "[XXXXX168]": 22527, "[XXXXX108]": 22467, "[XXXXX60]": 22419, "[XXXXX127]": 22486, "[XXXXX22]": 22381, "[XXXXX74]": 22433, "[XXXXX147]": 22506, "[XXXXX98]": 22457, "[XXXXX110]": 22469, "[XXXXX151]": 22510, "[XXXXX131]": 22490, "[XXXXX141]": 22500, "[XXXXX158]": 22517, "[XXXXX27]": 22386, "[XXXXX13]": 22372, "[XXXXX67]": 22426, "[XXXXX2]": 22361, "[XXXXX137]": 22496, "[XXXXX117]": 22476, "[XXXXX72]": 22431, "[XXXXX77]": 22436, "[XXXXX3]": 22362, "[XXXXX124]": 22483, "[XXXXX45]": 22404, "[XXXXX119]": 22478, "[XXXXX36]": 22395, "[XXXXX105]": 22464, "[XXXXX63]": 22422, "[XXXXX120]": 22479, "[XXXXX5]": 22364, "[XXXXX84]": 22443, "[XXXXX25]": 22384, "[XXXXX144]": 22503, "[XXXXX86]": 22445, "[XXXXX65]": 22424, "[XXXXX48]": 22407, "[XXXXX134]": 22493, "[XXXXX53]": 22412, "[XXXXX156]": 22515, "[XXXXX138]": 22497, "[XXXXX12]": 22371, "[XXXXX49]": 22408, "[XXXXX142]": 22501, "[XXXXX39]": 22398, "[XXXXX148]": 22507, "[XXXXX23]": 22382, "[XXXXX56]": 22415}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]", "[XXXXX85]", "[XXXXX86]", "[XXXXX87]", "[XXXXX88]", "[XXXXX89]", "[XXXXX90]", "[XXXXX91]", "[XXXXX92]", "[XXXXX93]", "[XXXXX94]", "[XXXXX95]", "[XXXXX96]", "[XXXXX97]", "[XXXXX98]", "[XXXXX99]", "[XXXXX100]", "[XXXXX101]", "[XXXXX102]", "[XXXXX103]", "[XXXXX104]", "[XXXXX105]", "[XXXXX106]", "[XXXXX107]", "[XXXXX108]", "[XXXXX109]", "[XXXXX110]", "[XXXXX111]", "[XXXXX112]", "[XXXXX113]", "[XXXXX114]", "[XXXXX115]", "[XXXXX116]", "[XXXXX117]", "[XXXXX118]", "[XXXXX119]", "[XXXXX120]", "[XXXXX121]", "[XXXXX122]", "[XXXXX123]", "[XXXXX124]", "[XXXXX125]", "[XXXXX126]", "[XXXXX127]", "[XXXXX128]", "[XXXXX129]", "[XXXXX130]", "[XXXXX131]", "[XXXXX132]", "[XXXXX133]", "[XXXXX134]", "[XXXXX135]", "[XXXXX136]", "[XXXXX137]", "[XXXXX138]", "[XXXXX139]", "[XXXXX140]", "[XXXXX141]", "[XXXXX142]", "[XXXXX143]", "[XXXXX144]", "[XXXXX145]", "[XXXXX146]", "[XXXXX147]", "[XXXXX148]", "[XXXXX149]", "[XXXXX150]", "[XXXXX151]", "[XXXXX152]", "[XXXXX153]", "[XXXXX154]", "[XXXXX155]", "[XXXXX156]", "[XXXXX157]", "[XXXXX158]", "[XXXXX159]", "[XXXXX160]", "[XXXXX161]", "[XXXXX162]", "[XXXXX163]", "[XXXXX164]", "[XXXXX165]", "[XXXXX166]", "[XXXXX167]", "[XXXXX168]"]}
|
spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:276a334ab02cad6badd89f904a4e9410970825f3603225958210b7d58169711b
|
3 |
+
size 591487
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/10mb/srn_latn_10mb", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/10mb/srn_latn_10mb/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}
|