Pleias-Pico / tokenizer_config.json

Upload folder using huggingface_hub

3fc3cd7 verified 23 days ago

6.41 kB

	{
	"added_tokens_decoder": {
	"0": {
	"content": "[UNK]",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"1": {
	"content": "<\|begin_of_text\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"2": {
	"content": "<\|end_of_text\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"3": {
	"content": "[PAD]",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65510": {
	"content": "<\|tool_call_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65511": {
	"content": "<\|tool_call_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65512": {
	"content": "<\|tool_list_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65513": {
	"content": "<\|tool_list_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65514": {
	"content": "<\|source_analysis_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65515": {
	"content": "<\|source_analysis_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65516": {
	"content": "<\|source_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65517": {
	"content": "<\|source_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65518": {
	"content": "<\|im_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65519": {
	"content": "<\|im_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65520": {
	"content": "<\|answer_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65521": {
	"content": "<\|answer_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65522": {
	"content": "<\|text_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65523": {
	"content": "<\|text_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65524": {
	"content": "<\|translation_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65525": {
	"content": "<\|translation_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65526": {
	"content": "<\|back_translation_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65527": {
	"content": "<\|back_translation_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65528": {
	"content": "<\|ocr_correction_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65529": {
	"content": "<\|ocr_correction_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65530": {
	"content": "<\|json_scheme_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65531": {
	"content": "<\|json_scheme_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65532": {
	"content": "<\|source_interpretation_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65533": {
	"content": "<\|source_interpretation_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65534": {
	"content": "<\|query_start\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"65535": {
	"content": "<\|query_end\|>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	}
	},
	"additional_special_tokens": [
	"<\|tool_call_start\|>",
	"<\|tool_call_end\|>",
	"<\|tool_list_start\|>",
	"<\|tool_list_end\|>",
	"<\|source_analysis_start\|>",
	"<\|source_analysis_end\|>",
	"<\|source_start\|>",
	"<\|source_end\|>",
	"<\|im_start\|>",
	"<\|im_end\|>",
	"<\|answer_start\|>",
	"<\|answer_end\|>",
	"<\|text_start\|>",
	"<\|text_end\|>",
	"<\|translation_start\|>",
	"<\|translation_end\|>",
	"<\|back_translation_start\|>",
	"<\|back_translation_end\|>",
	"<\|ocr_correction_start\|>",
	"<\|ocr_correction_end\|>",
	"<\|json_scheme_start\|>",
	"<\|json_scheme_end\|>",
	"<\|source_interpretation_start\|>",
	"<\|source_interpretation_end\|>",
	"<\|query_start\|>",
	"<\|query_end\|>"
	],
	"clean_up_tokenization_spaces": true,
	"model_max_length": 1000000000000000019884624838656,
	"tokenizer_class": "PreTrainedTokenizerFast"
	}