hogru's picture
Update tokenizer, bump hf versions
92eb358
{
"version": "1.0",
"truncation": null,
"padding": {
"strategy": "BatchLongest",
"direction": "Right",
"pad_to_multiple_of": null,
"pad_id": 2,
"pad_type_id": 0,
"pad_token": " "
},
"added_tokens": [
{
"id": 0,
"content": "^",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "_",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": " ",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "§",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "°",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": null,
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "^",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "_",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"^": {
"id": "^",
"ids": [
0
],
"tokens": [
"^"
]
},
"_": {
"id": "_",
"ids": [
1
],
"tokens": [
"_"
]
}
}
},
"decoder": {
"type": "WordPiece",
"prefix": "##",
"cleanup": false
},
"model": {
"type": "WordPiece",
"unk_token": "§",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 1024,
"vocab": {
"^": 0,
"_": 1,
" ": 2,
"§": 3,
"°": 4,
"#": 5,
"%": 6,
"(": 7,
")": 8,
"+": 9,
"-": 10,
"0": 11,
"1": 12,
"2": 13,
"3": 14,
"4": 15,
"5": 16,
"6": 17,
"7": 18,
"8": 19,
"9": 20,
"=": 21,
"B": 22,
"C": 23,
"F": 24,
"H": 25,
"I": 26,
"N": 27,
"O": 28,
"P": 29,
"S": 30,
"[": 31,
"]": 32,
"b": 33,
"c": 34,
"e": 35,
"i": 36,
"l": 37,
"n": 38,
"o": 39,
"p": 40,
"r": 41,
"s": 42,
"##c": 43,
"##1": 44,
"##(": 45,
"##C": 46,
"##=": 47,
"##2": 48,
"##O": 49,
"##)": 50,
"##n": 51,
"##-": 52,
"##l": 53,
"##N": 54,
"##S": 55,
"##3": 56,
"##[": 57,
"##H": 58,
"##]": 59,
"##+": 60,
"##o": 61,
"##F": 62,
"##4": 63,
"##B": 64,
"###": 65,
"##r": 66,
"##I": 67,
"##s": 68,
"##5": 69,
"##P": 70,
"##6": 71,
"##e": 72,
"##7": 73,
"##i": 74,
"##8": 75,
"##9": 76,
"##p": 77,
"##%": 78,
"##0": 79,
"##b": 80,
"##cc": 81,
"##CC": 82,
"##O)": 83,
"##C(": 84,
"##c1": 85,
"##=O)": 86,
"##c(": 87,
"##c2": 88,
"##C)": 89,
"##c1cc": 90,
"##C(=O)": 91,
"##c3": 92,
"##c2cc": 93,
"##)cc": 94,
"##(=O)": 95,
"##C1": 96,
"##c1ccc(": 97,
"##(C)": 98,
"##c3cc": 99,
"##2)": 100,
"##F)": 101,
"##CCCC": 102,
"##C(=O)N": 103,
"##c1cccc": 104,
"##C2": 105,
"##c2cccc": 106,
"##CN": 107,
"##Cl": 108,
"##C(C)": 109,
"##cn": 110,
"##)cc1": 111,
"##c4": 112,
"##CCN": 113,
"##3)": 114,
"CO": 115,
"##=C(": 116,
"##n1": 117,
"##c2ccccc2": 118,
"##nc(": 119,
"##c2ccc(": 120,
"##c1ccccc1": 121,
"##CC1": 122,
"##CO": 123,
"##c3cccc": 124,
"##[n": 125,
"##NC(=O)": 126,
"##N)": 127,
"##H]": 128,
"##[nH]": 129,
"##S(=O)": 130,
"##(C": 131,
"##S(=O)(=O)": 132,
"##Cl)": 133,
"O=C(": 134,
"##(F)": 135,
"##c3ccccc3": 136,
"##OC)": 137,
"##CC(=O)": 138,
"##C3": 139,
"##cccc": 140,
"##CC(": 141,
"##n2": 142,
"##c1cc(": 143,
"##c2c(": 144,
"##c1n": 145,
"##c3ccc(": 146,
"##cc1": 147,
"##CCN(": 148,
"##N1": 149,
"##CCO": 150,
"##C(F)": 151,
"##CC2": 152,
"##F)cc": 153,
"##c2)": 154,
"##cc(": 155,
"##c2ccccc2)": 156,
"##c2n": 157,
"##C(F)(F)": 158,
"##C(O)": 159,
"##Cl)cc": 160,
"##C=": 161,
"##c(=O)": 162,
"##+]": 163,
"##c1cccc(": 164,
"##4)": 165,
"##OC": 166,
"COc1ccc(": 167,
"##c1)": 168,
"##=N": 169,
"##C(C": 170,
"##=O": 171,
"##c2cc(": 172,
"##N2": 173,
"##CCCN": 174,
"##c(-": 175,
"##-]": 176,
"CC(C)": 177,
"##[N": 178,
"##O-]": 179,
"##[O-]": 180
}
}
}