ketanmore's picture
Upload folder using huggingface_hub
2720487 verified
raw
history blame
1.68 kB
from transformers import T5Config, MBartConfig, DonutSwinConfig
class MBartMoEConfig(MBartConfig):
pass
class VariableDonutSwinConfig(DonutSwinConfig):
pass
# Config specific to the model, needed for the tokenizer
TOTAL_TOKENS = 65536
TOKEN_OFFSET = 3 # Pad, eos, bos
SPECIAL_TOKENS = 253
TOTAL_VOCAB_SIZE = TOTAL_TOKENS + TOKEN_OFFSET + SPECIAL_TOKENS
LANGUAGE_MAP = {
'af': 0,
'am': 1,
'ar': 2,
'as': 3,
'az': 4,
'be': 5,
'bg': 6,
'bn': 7,
'br': 8,
'bs': 9,
'ca': 10,
'cs': 11,
'cy': 12,
'da': 13,
'de': 14,
'el': 15,
'en': 16,
'eo': 17,
'es': 18,
'et': 19,
'eu': 20,
'fa': 21,
'fi': 22,
'fr': 23,
'fy': 24,
'ga': 25,
'gd': 26,
'gl': 27,
'gu': 28,
'ha': 29,
'he': 30,
'hi': 31,
'hr': 32,
'hu': 33,
'hy': 34,
'id': 35,
'is': 36,
'it': 37,
'ja': 38,
'jv': 39,
'ka': 40,
'kk': 41,
'km': 42,
'kn': 43,
'ko': 44,
'ku': 45,
'ky': 46,
'la': 47,
'lo': 48,
'lt': 49,
'lv': 50,
'mg': 51,
'mk': 52,
'ml': 53,
'mn': 54,
'mr': 55,
'ms': 56,
'my': 57,
'ne': 58,
'nl': 59,
'no': 60,
'om': 61,
'or': 62,
'pa': 63,
'pl': 64,
'ps': 65,
'pt': 66,
'ro': 67,
'ru': 68,
'sa': 69,
'sd': 70,
'si': 71,
'sk': 72,
'sl': 73,
'so': 74,
'sq': 75,
'sr': 76,
'su': 77,
'sv': 78,
'sw': 79,
'ta': 80,
'te': 81,
'th': 82,
'tl': 83,
'tr': 84,
'ug': 85,
'uk': 86,
'ur': 87,
'uz': 88,
'vi': 89,
'xh': 90,
'yi': 91,
'zh': 92
}