baseline_universal_phone_120k / tokenizer_config.json

Training in progress, step 5000

9e0fd25 about 1 year ago

8.06 kB

	{
	"added_tokens_decoder": {
	"0": {
	"content": "<s>",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": true
	},
	"1": {
	"content": "<pad>",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": true
	},
	"2": {
	"content": "</s>",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": true
	},
	"3": {
	"content": "<unk>",
	"lstrip": true,
	"normalized": false,
	"rstrip": true,
	"single_word": false,
	"special": true
	},
	"79": {
	"content": "<mask>",
	"lstrip": true,
	"normalized": true,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"80": {
	"content": "<ctc_blank>",
	"lstrip": false,
	"normalized": true,
	"rstrip": false,
	"single_word": false,
	"special": false
	},
	"81": {
	"content": "ʈ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"82": {
	"content": "ʯ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"83": {
	"content": "ʃ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"84": {
	"content": "ʂ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"85": {
	"content": "ɑ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"86": {
	"content": "ʊ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"87": {
	"content": "ɞ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"88": {
	"content": "θ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"89": {
	"content": "ɗ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"90": {
	"content": "γ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"91": {
	"content": "ɝ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"92": {
	"content": "β",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"93": {
	"content": "ɱ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"94": {
	"content": "ʉ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"95": {
	"content": "ŋ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"96": {
	"content": "ɖ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"97": {
	"content": "ϳ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"98": {
	"content": "ο",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"99": {
	"content": "ɨ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"100": {
	"content": "π",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"101": {
	"content": "ʀ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"102": {
	"content": "ɬ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"103": {
	"content": "ɲ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"104": {
	"content": "ɦ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"105": {
	"content": "ɮ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"106": {
	"content": "Α",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"107": {
	"content": "ɛ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"108": {
	"content": "ʏ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"109": {
	"content": "Ι",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"110": {
	"content": "ɫ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"111": {
	"content": "ʔ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"112": {
	"content": "ϝ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"113": {
	"content": "ʧ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"114": {
	"content": "ɕ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"115": {
	"content": "ʎ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"116": {
	"content": "ρ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"117": {
	"content": "ɳ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"118": {
	"content": "Ο",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"119": {
	"content": "ʋ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"120": {
	"content": "κ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	},
	"121": {
	"content": "ɴ",
	"lstrip": true,
	"normalized": true,
	"rstrip": true,
	"single_word": false,
	"special": false
	}
	},
	"bos_token": "<s>",
	"clean_up_tokenization_spaces": true,
	"eos_token": "</s>",
	"mask_token": "<mask>",
	"model_max_length": 600,
	"normalize": false,
	"pad_token": "<pad>",
	"processor_class": "SpeechT5Processor",
	"sp_model_kwargs": {},
	"tokenizer_class": "SpeechT5Tokenizer",
	"unk_token": "<unk>"
	}