Orion-zhen's picture
Add files using upload-large-folder tool
3e4fb5d verified
{
"num_threads": 224,
"split_by_whitespace": true,
"model_type": "unigram",
"vocab_size": 250680,
"character_coverage": 0.9999,
"byte_fallback": true,
"split_by_number": true,
"split_digits": true,
"normalization_rule_name": "nfkc",
"max_sentence_length": 4096,
"shuffle_input_sentence": true,
"input_sentence_size": 0,
"train_extremely_large_corpus": true,
"allow_whitespace_only_pieces": true,
"required_chars": "",
"remove_extra_whitespaces": false,
"user_defined_symbols": [
"<s>",
"</s>",
"<pad>",
"<eod>",
"<placeholder_tok_0>",
"<placeholder_tok_1>",
"<placeholder_tok_2>",
"<placeholder_tok_3>",
"<placeholder_tok_4>",
"<placeholder_tok_5>",
"<placeholder_tok_6>",
"<placeholder_tok_7>",
"<placeholder_tok_8>",
"<placeholder_tok_9>",
"<placeholder_tok_10>",
"<placeholder_tok_11>",
"<placeholder_tok_12>",
"<placeholder_tok_13>",
"<placeholder_tok_14>",
"<placeholder_tok_15>",
"<placeholder_tok_16>",
"<placeholder_tok_17>",
"<placeholder_tok_18>",
"<placeholder_tok_19>",
"<placeholder_tok_20>",
"<placeholder_tok_21>",
"<placeholder_tok_22>",
"<placeholder_tok_23>",
"<placeholder_tok_24>",
"<placeholder_tok_25>",
"<placeholder_tok_26>",
"<placeholder_tok_27>",
"<placeholder_tok_28>",
"<placeholder_tok_29>",
"<placeholder_tok_30>",
"<placeholder_tok_31>",
"<placeholder_tok_32>",
"<placeholder_tok_33>",
"<placeholder_tok_34>",
"<placeholder_tok_35>",
"<placeholder_tok_36>",
"<placeholder_tok_37>",
"<placeholder_tok_38>",
"<placeholder_tok_39>",
"<placeholder_tok_40>",
"<placeholder_tok_41>",
"<placeholder_tok_42>",
"<placeholder_tok_43>",
"<placeholder_tok_44>",
"<placeholder_tok_45>",
"<placeholder_tok_46>",
"<placeholder_tok_47>",
"<placeholder_tok_48>",
"<placeholder_tok_49>",
"<placeholder_tok_50>",
"<placeholder_tok_51>",
"<placeholder_tok_52>",
"<placeholder_tok_53>",
"<placeholder_tok_54>",
"<placeholder_tok_55>",
"<placeholder_tok_56>",
"<placeholder_tok_57>",
"<placeholder_tok_58>",
"<placeholder_tok_59>",
"<placeholder_tok_60>",
"<placeholder_tok_61>",
"<placeholder_tok_62>",
"<placeholder_tok_63>",
"<placeholder_tok_64>",
"<placeholder_tok_65>",
"<placeholder_tok_66>",
"<placeholder_tok_67>",
"<placeholder_tok_68>",
"<placeholder_tok_69>",
"<placeholder_tok_70>",
"<placeholder_tok_71>",
"<placeholder_tok_72>",
"<placeholder_tok_73>",
"<placeholder_tok_74>",
"<placeholder_tok_75>",
"<placeholder_tok_76>",
"<placeholder_tok_77>",
"<placeholder_tok_78>",
"<placeholder_tok_79>",
"<placeholder_tok_80>",
"<placeholder_tok_81>",
"<placeholder_tok_82>",
"<placeholder_tok_83>",
"<placeholder_tok_84>",
"<placeholder_tok_85>",
"<placeholder_tok_86>",
"<placeholder_tok_87>",
"<placeholder_tok_88>",
"<placeholder_tok_89>",
"<placeholder_tok_90>",
"<placeholder_tok_91>",
"<placeholder_tok_92>",
"<placeholder_tok_93>",
"<placeholder_tok_94>",
"<placeholder_tok_95>",
"<placeholder_tok_96>",
"<placeholder_tok_97>",
"<placeholder_tok_98>",
"<placeholder_tok_99>",
"<placeholder_tok_100>",
"<placeholder_tok_101>",
"<placeholder_tok_102>",
"<placeholder_tok_103>",
"<placeholder_tok_104>",
"<placeholder_tok_105>",
"<placeholder_tok_106>",
"<placeholder_tok_107>",
"<placeholder_tok_108>",
"<placeholder_tok_109>",
"<placeholder_tok_110>",
"<placeholder_tok_111>",
"<placeholder_tok_112>",
"<placeholder_tok_113>",
"<placeholder_tok_114>",
"<placeholder_tok_115>",
"<placeholder_tok_116>",
"<placeholder_tok_117>",
"<placeholder_tok_118>",
"<placeholder_tok_119>",
"<placeholder_tok_120>",
"<placeholder_tok_121>",
"<placeholder_tok_122>",
"<placeholder_tok_123>",
"<placeholder_tok_124>",
"<placeholder_tok_125>",
"<placeholder_tok_126>",
"<placeholder_tok_127>",
"<placeholder_tok_128>",
"<placeholder_tok_129>",
"<placeholder_tok_130>",
"<placeholder_tok_131>",
"<placeholder_tok_132>",
"<placeholder_tok_133>",
"<placeholder_tok_134>",
"<placeholder_tok_135>",
"<placeholder_tok_136>",
"<placeholder_tok_137>",
"<placeholder_tok_138>",
"<placeholder_tok_139>",
"<placeholder_tok_140>",
"<placeholder_tok_141>",
"<placeholder_tok_142>",
"<placeholder_tok_143>",
"<placeholder_tok_144>",
"<placeholder_tok_145>",
"<placeholder_tok_146>",
"<placeholder_tok_147>",
"<placeholder_tok_148>",
"<placeholder_tok_149>",
"<placeholder_tok_150>",
"<placeholder_tok_151>",
"<placeholder_tok_152>",
"<placeholder_tok_153>",
"<placeholder_tok_154>",
"<placeholder_tok_155>",
"<placeholder_tok_156>",
"<placeholder_tok_157>",
"<placeholder_tok_158>",
"<placeholder_tok_159>",
"<placeholder_tok_160>",
"<placeholder_tok_161>",
"<placeholder_tok_162>",
"<placeholder_tok_163>",
"<placeholder_tok_164>",
"<placeholder_tok_165>",
"<placeholder_tok_166>",
"<placeholder_tok_167>",
"<placeholder_tok_168>",
"<placeholder_tok_169>",
"<placeholder_tok_170>",
"<placeholder_tok_171>",
"<placeholder_tok_172>",
"<placeholder_tok_173>",
"<placeholder_tok_174>",
"<placeholder_tok_175>",
"<placeholder_tok_176>",
"<placeholder_tok_177>",
"<placeholder_tok_178>",
"<placeholder_tok_179>",
"<placeholder_tok_180>",
"<placeholder_tok_181>",
"<placeholder_tok_182>",
"<placeholder_tok_183>",
"<placeholder_tok_184>",
"<placeholder_tok_185>",
"<placeholder_tok_186>",
"<placeholder_tok_187>",
"<placeholder_tok_188>",
"<placeholder_tok_189>",
"<placeholder_tok_190>",
"<placeholder_tok_191>",
"<placeholder_tok_192>",
"<placeholder_tok_193>",
"<placeholder_tok_194>",
"<placeholder_tok_195>",
"<placeholder_tok_196>",
"<placeholder_tok_197>",
"<placeholder_tok_198>",
"<placeholder_tok_199>",
"<placeholder_tok_200>",
"<placeholder_tok_201>",
"<placeholder_tok_202>",
"<placeholder_tok_203>",
"<placeholder_tok_204>",
"<placeholder_tok_205>",
"<placeholder_tok_206>",
"<placeholder_tok_207>",
"<placeholder_tok_208>",
"<placeholder_tok_209>",
"<placeholder_tok_210>",
"<placeholder_tok_211>",
"<placeholder_tok_212>",
"<placeholder_tok_213>",
"<placeholder_tok_214>",
"<placeholder_tok_215>",
"<placeholder_tok_216>",
"<placeholder_tok_217>",
"<placeholder_tok_218>",
"<placeholder_tok_219>",
"<placeholder_tok_220>",
"<placeholder_tok_221>",
"<placeholder_tok_222>",
"<placeholder_tok_223>",
"<placeholder_tok_224>",
"<placeholder_tok_225>",
"<placeholder_tok_226>",
"<placeholder_tok_227>",
"<placeholder_tok_228>",
"<placeholder_tok_229>",
"<placeholder_tok_230>",
"<placeholder_tok_231>",
"<placeholder_tok_232>",
"<placeholder_tok_233>",
"<placeholder_tok_234>",
"<placeholder_tok_235>",
"<placeholder_tok_236>",
"<placeholder_tok_237>",
"<placeholder_tok_238>",
"<placeholder_tok_239>",
"<placeholder_tok_240>",
"<placeholder_tok_241>",
"<placeholder_tok_242>",
"<placeholder_tok_243>",
"<placeholder_tok_244>",
"<placeholder_tok_245>",
"<placeholder_tok_246>",
"<placeholder_tok_247>",
"<placeholder_tok_248>",
"<placeholder_tok_249>",
"<placeholder_tok_250>",
"<placeholder_tok_251>",
"<placeholder_tok_252>",
"<placeholder_tok_253>",
"<placeholder_tok_254>",
"<placeholder_tok_255>"
],
"datasets_dir": "/home/fhgiais/gptx_ablations/bias_analysis/data/tokenizer/temp/",
"save_dir": "/home/fhgiais/gptx_ablations/bias_analysis/tokenizer/24",
"text_key": "text",
"cache_dir": "/home/fhgiais/gptx_ablations/bias_analysis/tokenizer/24/cache",
"library": "sentencepiece",
"auto_map": {
"AutoTokenizer": [
"gptx_tokenizer.SPTokenizer",
null
]
},
"tokenizer_class": "SPTokenizer"
}