{ "add_bos_token": false, "add_prefix_space": false, "added_tokens_decoder": { "2": { "content": "[CLS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "3": { "content": "[SEP]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "4": { "content": "[MASK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50106": { "content": "<|endoftext|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": true }, "50107": { "content": "<|¡|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50108": { "content": "<|¢|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50109": { "content": "<|£|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50110": { "content": "<|¤|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50111": { "content": "<|¥|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50112": { "content": "<|¦|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50113": { "content": "<|§|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50114": { "content": "<|¨|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50115": { "content": "<|©|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50116": { "content": "<|ª|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50117": { "content": "<|«|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50118": { "content": "<|¬|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50119": { "content": "<|®|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50120": { "content": "<|¯|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50121": { "content": "<|°|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50122": { "content": "<|±|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50123": { "content": "<|²|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50124": { "content": "<|³|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50125": { "content": "<|´|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50126": { "content": "<|µ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50127": { "content": "<|¶|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50128": { "content": "<|·|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50129": { "content": "<|¸|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50130": { "content": "<|¹|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50131": { "content": "<|º|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50132": { "content": "<|»|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50133": { "content": "<|¼|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50134": { "content": "<|½|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50135": { "content": "<|¾|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50136": { "content": "<|¿|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50137": { "content": "<|À|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50138": { "content": "<|Á|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50139": { "content": "<|Â|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50140": { "content": "<|Ã|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50141": { "content": "<|Ä|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50142": { "content": "<|Å|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50143": { "content": "<|Æ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50144": { "content": "<|Ç|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50145": { "content": "<|È|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50146": { "content": "<|É|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50147": { "content": "<|Ê|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50148": { "content": "<|Ë|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50149": { "content": "<|Ì|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50150": { "content": "<|Í|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50151": { "content": "<|Î|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50152": { "content": "<|Ï|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50153": { "content": "<|Ð|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50154": { "content": "<|Ñ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50155": { "content": "<|Ò|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50156": { "content": "<|Ó|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50157": { "content": "<|Ô|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50158": { "content": "<|Õ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50159": { "content": "<|Ö|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50160": { "content": "<|×|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50161": { "content": "<|Ø|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50162": { "content": "<|Ù|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50163": { "content": "<|Ú|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50164": { "content": "<|Û|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50165": { "content": "<|Ü|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50166": { "content": "<|Ý|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50167": { "content": "<|Þ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50168": { "content": "<|ß|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50169": { "content": "<|à|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50170": { "content": "<|á|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50171": { "content": "<|â|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50172": { "content": "<|ã|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50173": { "content": "<|ä|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50174": { "content": "<|å|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50175": { "content": "<|æ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50176": { "content": "<|ç|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50177": { "content": "<|è|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50178": { "content": "<|é|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50179": { "content": "<|ê|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50180": { "content": "<|ë|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50181": { "content": "<|ì|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50182": { "content": "<|í|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50183": { "content": "<|î|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50184": { "content": "<|ï|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50185": { "content": "<|ð|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50186": { "content": "<|ñ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50187": { "content": "<|ò|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50188": { "content": "<|ó|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50189": { "content": "<|ô|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50190": { "content": "<|õ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50191": { "content": "<|ö|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50192": { "content": "<|÷|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50193": { "content": "<|ø|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50194": { "content": "<|ù|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50195": { "content": "<|ú|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50196": { "content": "<|û|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50197": { "content": "<|ü|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50198": { "content": "<|ý|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50199": { "content": "<|þ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50200": { "content": "<|ÿ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50201": { "content": "<|Ā|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50202": { "content": "<|ā|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50203": { "content": "<|Ă|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50204": { "content": "<|ă|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50205": { "content": "<|Ą|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50206": { "content": "<|ą|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50207": { "content": "<|Ć|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50208": { "content": "<|ć|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50209": { "content": "<|Ĉ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50210": { "content": "<|ĉ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50211": { "content": "<|Ċ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50212": { "content": "<|ċ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50213": { "content": "<|Č|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50214": { "content": "<|č|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50215": { "content": "<|Ď|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50216": { "content": "<|ď|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50217": { "content": "<|Đ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50218": { "content": "<|đ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50219": { "content": "<|Ē|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50220": { "content": "<|ē|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50221": { "content": "<|Ĕ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50222": { "content": "<|ĕ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50223": { "content": "<|Ė|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50224": { "content": "<|ė|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50225": { "content": "<|Ę|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50226": { "content": "<|ę|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50227": { "content": "<|Ě|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50228": { "content": "<|ě|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50229": { "content": "<|Ĝ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50230": { "content": "<|ĝ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50231": { "content": "<|Ğ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50232": { "content": "<|ğ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50233": { "content": "<|Ġ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50234": { "content": "<|ġ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50235": { "content": "<|Ģ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50236": { "content": "<|ģ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50237": { "content": "<|Ĥ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50238": { "content": "<|ĥ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50239": { "content": "<|Ħ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50240": { "content": "<|ħ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50241": { "content": "<|Ĩ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50242": { "content": "<|ĩ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50243": { "content": "<|Ī|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50244": { "content": "<|ī|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50245": { "content": "<|Ĭ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50246": { "content": "<|ĭ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50247": { "content": "<|Į|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50248": { "content": "<|į|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50249": { "content": "<|İ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50250": { "content": "<|ı|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50251": { "content": "<|IJ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50252": { "content": "<|ij|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50253": { "content": "<|Ĵ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50254": { "content": "<|ĵ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50255": { "content": "<|Ķ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50256": { "content": "<|ķ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50257": { "content": "<|ĸ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50258": { "content": "<|Ĺ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50259": { "content": "<|ĺ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50260": { "content": "<|Ļ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50261": { "content": "<|ļ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50262": { "content": "<|Ľ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50263": { "content": "<|ľ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50264": { "content": "<|Ŀ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50265": { "content": "<|ŀ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50266": { "content": "<|Ł|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50267": { "content": "<|ł|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50268": { "content": "<|Ń|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50269": { "content": "<|Ûķ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50270": { "content": "<|ÛĮ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50271": { "content": "<|ا|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50272": { "content": "<|ĠØ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50273": { "content": "<|ÙĪ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50274": { "content": "<|ÙĨ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50275": { "content": "<|ر|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50276": { "content": "<|Ú©|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50277": { "content": "<|ĠÙ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50278": { "content": "<|ت|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50279": { "content": "<|اÙĨ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50280": { "content": "<|Ûİ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50281": { "content": "<|د|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50282": { "content": "<|Ùħ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50283": { "content": "<|Ġب|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50284": { "content": "<|ÛĨ|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50285": { "content": "<|س|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50286": { "content": "<|hu|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50287": { "content": "<|ta|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50288": { "content": "<|no|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50289": { "content": "<|th|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50290": { "content": "<|ur|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50291": { "content": "<|hr|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50292": { "content": "<|bg|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50293": { "content": "<|lt|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50294": { "content": "<|la|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50295": { "content": "<|mi|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50296": { "content": "<|ml|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50297": { "content": "<|cy|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50298": { "content": "<|sk|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50299": { "content": "<|te|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50300": { "content": "<|fa|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50301": { "content": "<|lv|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50302": { "content": "<|bn|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50303": { "content": "<|sr|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50304": { "content": "<|az|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50305": { "content": "<|sl|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50306": { "content": "<|kn|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50307": { "content": "<|et|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50308": { "content": "<|mk|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50309": { "content": "<|br|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50310": { "content": "<|eu|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50311": { "content": "<|is|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50312": { "content": "<|hy|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50313": { "content": "<|ne|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50314": { "content": "<|mn|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50315": { "content": "<|bs|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50316": { "content": "<|kk|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50317": { "content": "<|sq|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50318": { "content": "<|sw|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50319": { "content": "<|gl|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50320": { "content": "<|mr|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50321": { "content": "<|pa|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50322": { "content": "<|si|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50323": { "content": "<|km|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50324": { "content": "<|sn|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50325": { "content": "<|yo|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50326": { "content": "<|so|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50327": { "content": "<|af|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50328": { "content": "<|oc|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50329": { "content": "<|ka|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50330": { "content": "<|be|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50331": { "content": "<|tg|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50332": { "content": "<|sd|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50333": { "content": "<|gu|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50334": { "content": "<|am|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50335": { "content": "<|yi|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50336": { "content": "<|lo|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50337": { "content": "<|uz|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50338": { "content": "<|fo|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50339": { "content": "<|ht|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50340": { "content": "<|ps|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50341": { "content": "<|tk|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50342": { "content": "<|nn|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50343": { "content": "<|mt|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50344": { "content": "<|sa|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50345": { "content": "<|lb|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50346": { "content": "<|my|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50347": { "content": "<|bo|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50348": { "content": "<|tl|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50349": { "content": "<|mg|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50350": { "content": "<|as|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50351": { "content": "<|tt|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50352": { "content": "<|haw|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50353": { "content": "<|ln|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50354": { "content": "<|ha|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50355": { "content": "<|ba|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50356": { "content": "<|jw|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50357": { "content": "<|su|>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "50358": { "content": "<|translate|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50359": { "content": "<|transcribe|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50360": { "content": "<|startoflm|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50361": { "content": "<|startofprev|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50362": { "content": "<|nocaptions|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "50363": { "content": "<|notimestamps|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true } }, "additional_special_tokens": [ "<|endoftext|>", "<|startoftranscript|>", "<|¡|>", "<|¢|>", "<|£|>", "<|¤|>", "<|¥|>", "<|¦|>", "<|§|>", "<|¨|>", "<|©|>", "<|ª|>", "<|«|>", "<|¬|>", "<|®|>", "<|¯|>", "<|°|>", "<|±|>", "<|²|>", "<|³|>", "<|´|>", "<|µ|>", "<|¶|>", "<|·|>", "<|¸|>", "<|¹|>", "<|º|>", "<|»|>", "<|¼|>", "<|½|>", "<|¾|>", "<|¿|>", "<|À|>", "<|Á|>", "<|Â|>", "<|Ã|>", "<|Ä|>", "<|Å|>", "<|Æ|>", "<|Ç|>", "<|È|>", "<|É|>", "<|Ê|>", "<|Ë|>", "<|Ì|>", "<|Í|>", "<|Î|>", "<|Ï|>", "<|Ð|>", "<|Ñ|>", "<|Ò|>", "<|Ó|>", "<|Ô|>", "<|Õ|>", "<|Ö|>", "<|×|>", "<|Ø|>", "<|Ù|>", "<|Ú|>", "<|Û|>", "<|Ü|>", "<|Ý|>", "<|Þ|>", "<|ß|>", "<|à|>", "<|á|>", "<|â|>", "<|ã|>", "<|ä|>", "<|å|>", "<|æ|>", "<|ç|>", "<|è|>", "<|é|>", "<|ê|>", "<|ë|>", "<|ì|>", "<|í|>", "<|î|>", "<|ï|>", "<|ð|>", "<|ñ|>", "<|ò|>", "<|ó|>", "<|ô|>", "<|õ|>", "<|ö|>", "<|÷|>", "<|ø|>", "<|ù|>", "<|ú|>", "<|û|>", "<|ü|>", "<|ý|>", "<|þ|>", "<|ÿ|>", "<|Ā|>", "<|ā|>", "<|Ă|>", "<|ă|>", "<|Ą|>", "<|ą|>", "<|Ć|>", "<|ć|>", "<|Ĉ|>", "<|ĉ|>", "<|Ċ|>", "<|ċ|>", "<|Č|>", "<|č|>", "<|Ď|>", "<|ď|>", "<|Đ|>", "<|đ|>", "<|Ē|>", "<|ē|>", "<|Ĕ|>", "<|ĕ|>", "<|Ė|>", "<|ė|>", "<|Ę|>", "<|ę|>", "<|Ě|>", "<|ě|>", "<|Ĝ|>", "<|ĝ|>", "<|Ğ|>", "<|ğ|>", "<|Ġ|>", "<|ġ|>", "<|Ģ|>", "<|ģ|>", "<|Ĥ|>", "<|ĥ|>", "<|Ħ|>", "<|ħ|>", "<|Ĩ|>", "<|ĩ|>", "<|Ī|>", "<|ī|>", "<|Ĭ|>", "<|ĭ|>", "<|Į|>", "<|į|>", "<|İ|>", "<|ı|>", "<|IJ|>", "<|ij|>", "<|Ĵ|>", "<|ĵ|>", "<|Ķ|>", "<|ķ|>", "<|ĸ|>", "<|Ĺ|>", "<|ĺ|>", "<|Ļ|>", "<|ļ|>", "<|Ľ|>", "<|ľ|>", "<|Ŀ|>", "<|ŀ|>", "<|Ł|>", "<|ł|>", "<|Ń|>", "<|Ûķ|>", "<|ÛĮ|>", "<|ا|>", "<|ĠØ|>", "<|ÙĪ|>", "<|ÙĨ|>", "<|ر|>", "<|Ú©|>", "<|ĠÙ|>", "<|ت|>", "<|اÙĨ|>", "<|Ûİ|>", "<|د|>", "<|Ùħ|>", "<|Ġب|>", "<|ÛĨ|>", "<|س|>", "<|translate|>", "<|transcribe|>", "<|startoflm|>", "<|startofprev|>", "<|nocaptions|>", "<|notimestamps|>" ], "bos_token": "<|endoftext|>", "clean_up_tokenization_spaces": true, "cls_token": "[CLS]", "do_basic_tokenize": true, "do_lower_case": true, "eos_token": "<|endoftext|>", "errors": "replace", "mask_token": "[MASK]", "model_max_length": 12, "never_split": null, "pad_token": "<|endoftext|>", "processor_class": "WhisperProcessor", "return_attention_mask": false, "sep_token": "[SEP]", "strip_accents": null, "tokenize_chinese_chars": true, "tokenizer_class": "BertTokenizer", "unk_token": "<|endoftext|>" }