Remove special characters
Browse files- tokenizer.json +1 -1
- vocab.json +1 -1
tokenizer.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
PreTrainedTokenizer(name_or_path='', vocab_size=
|
|
|
1 |
+
PreTrainedTokenizer(name_or_path='', vocab_size=38, model_max_len=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '[UNK]', 'pad_token': '[PAD]'})
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "\u00e0": 27, "\u00e1": 28, "\u00e4": 29, "\u00e5": 30, "\u00f1": 31, "\u00f6": 32, "\u00fa": 33, "\u00fc": 34, "\u0161": 35, "|": 0, "[UNK]": 36, "[PAD]": 37}
|