from transformers import T5Config, MBartConfig, DonutSwinConfig | |
class MBartMoEConfig(MBartConfig): | |
pass | |
class VariableDonutSwinConfig(DonutSwinConfig): | |
pass | |
# Config specific to the model, needed for the tokenizer | |
TOTAL_TOKENS = 65536 | |
TOKEN_OFFSET = 3 # Pad, eos, bos | |
SPECIAL_TOKENS = 253 | |
TOTAL_VOCAB_SIZE = TOTAL_TOKENS + TOKEN_OFFSET + SPECIAL_TOKENS | |
LANGUAGE_MAP = { | |
'af': 0, | |
'am': 1, | |
'ar': 2, | |
'as': 3, | |
'az': 4, | |
'be': 5, | |
'bg': 6, | |
'bn': 7, | |
'br': 8, | |
'bs': 9, | |
'ca': 10, | |
'cs': 11, | |
'cy': 12, | |
'da': 13, | |
'de': 14, | |
'el': 15, | |
'en': 16, | |
'eo': 17, | |
'es': 18, | |
'et': 19, | |
'eu': 20, | |
'fa': 21, | |
'fi': 22, | |
'fr': 23, | |
'fy': 24, | |
'ga': 25, | |
'gd': 26, | |
'gl': 27, | |
'gu': 28, | |
'ha': 29, | |
'he': 30, | |
'hi': 31, | |
'hr': 32, | |
'hu': 33, | |
'hy': 34, | |
'id': 35, | |
'is': 36, | |
'it': 37, | |
'ja': 38, | |
'jv': 39, | |
'ka': 40, | |
'kk': 41, | |
'km': 42, | |
'kn': 43, | |
'ko': 44, | |
'ku': 45, | |
'ky': 46, | |
'la': 47, | |
'lo': 48, | |
'lt': 49, | |
'lv': 50, | |
'mg': 51, | |
'mk': 52, | |
'ml': 53, | |
'mn': 54, | |
'mr': 55, | |
'ms': 56, | |
'my': 57, | |
'ne': 58, | |
'nl': 59, | |
'no': 60, | |
'om': 61, | |
'or': 62, | |
'pa': 63, | |
'pl': 64, | |
'ps': 65, | |
'pt': 66, | |
'ro': 67, | |
'ru': 68, | |
'sa': 69, | |
'sd': 70, | |
'si': 71, | |
'sk': 72, | |
'sl': 73, | |
'so': 74, | |
'sq': 75, | |
'sr': 76, | |
'su': 77, | |
'sv': 78, | |
'sw': 79, | |
'ta': 80, | |
'te': 81, | |
'th': 82, | |
'tl': 83, | |
'tr': 84, | |
'ug': 85, | |
'uk': 86, | |
'ur': 87, | |
'uz': 88, | |
'vi': 89, | |
'xh': 90, | |
'yi': 91, | |
'zh': 92 | |
} |