xlm-mlm-100-1280 / tokenizer_config.json
lysandre's picture
lysandre HF staff
Create tokenizer_config.json
d5f927b
raw
history blame
2.35 kB
{"do_lowercase_and_remove_accent": false, "id2lang": {"0": "af", "1": "als", "2": "am", "3": "an", "4": "ang", "5": "ar", "6": "arz", "7": "ast", "8": "az", "9": "bar", "10": "be", "11": "bg", "12": "bn", "13": "br", "14": "bs", "15": "ca", "16": "ceb", "17": "ckb", "18": "cs", "19": "cy", "20": "da", "21": "de", "22": "el", "23": "en", "24": "eo", "25": "es", "26": "et", "27": "eu", "28": "fa", "29": "fi", "30": "fr", "31": "fy", "32": "ga", "33": "gan", "34": "gl", "35": "gu", "36": "he", "37": "hi", "38": "hr", "39": "hu", "40": "hy", "41": "ia", "42": "id", "43": "is", "44": "it", "45": "ja", "46": "jv", "47": "ka", "48": "kk", "49": "kn", "50": "ko", "51": "ku", "52": "la", "53": "lb", "54": "lt", "55": "lv", "56": "mk", "57": "ml", "58": "mn", "59": "mr", "60": "ms", "61": "my", "62": "nds", "63": "ne", "64": "nl", "65": "nn", "66": "no", "67": "oc", "68": "pl", "69": "pt", "70": "ro", "71": "ru", "72": "scn", "73": "sco", "74": "sh", "75": "si", "76": "simple", "77": "sk", "78": "sl", "79": "sq", "80": "sr", "81": "sv", "82": "sw", "83": "ta", "84": "te", "85": "th", "86": "tl", "87": "tr", "88": "tt", "89": "uk", "90": "ur", "91": "uz", "92": "vi", "93": "war", "94": "wuu", "95": "yi", "96": "zh", "97": "zh_classical", "98": "zh_min_nan", "99": "zh_yue"}, "lang2id": {"af": 0, "als": 1, "am": 2, "an": 3, "ang": 4, "ar": 5, "arz": 6, "ast": 7, "az": 8, "bar": 9, "be": 10, "bg": 11, "bn": 12, "br": 13, "bs": 14, "ca": 15, "ceb": 16, "ckb": 17, "cs": 18, "cy": 19, "da": 20, "de": 21, "el": 22, "en": 23, "eo": 24, "es": 25, "et": 26, "eu": 27, "fa": 28, "fi": 29, "fr": 30, "fy": 31, "ga": 32, "gan": 33, "gl": 34, "gu": 35, "he": 36, "hi": 37, "hr": 38, "hu": 39, "hy": 40, "ia": 41, "id": 42, "is": 43, "it": 44, "ja": 45, "jv": 46, "ka": 47, "kk": 48, "kn": 49, "ko": 50, "ku": 51, "la": 52, "lb": 53, "lt": 54, "lv": 55, "mk": 56, "ml": 57, "mn": 58, "mr": 59, "ms": 60, "my": 61, "nds": 62, "ne": 63, "nl": 64, "nn": 65, "no": 66, "oc": 67, "pl": 68, "pt": 69, "ro": 70, "ru": 71, "scn": 72, "sco": 73, "sh": 74, "si": 75, "simple": 76, "sk": 77, "sl": 78, "sq": 79, "sr": 80, "sv": 81, "sw": 82, "ta": 83, "te": 84, "th": 85, "tl": 86, "tr": 87, "tt": 88, "uk": 89, "ur": 90, "uz": 91, "vi": 92, "war": 93, "wuu": 94, "yi": 95, "zh": 96, "zh_classical": 97, "zh_min_nan": 98, "zh_yue": 99}, "model_max_length": 512}