bert-fa-sentiment-dksf / preprocessor /tokenizer_config.yaml

Upload 5 files

d57bcf7 over 1 year ago

No virus

596 Bytes

	name: wordpiece_tokenizer
	config_type: preprocessor
	pretrained_path: hezar-ai/bert-base-fa
	max_length: 512
	truncation_strategy: longest_first
	truncation_direction: right
	stride: 0
	padding_strategy: longest
	padding_direction: right
	pad_to_multiple_of: 0
	pad_token_id: 0
	pad_token: '[PAD]'
	pad_token_type_id: 0
	unk_token: '[UNK]'
	special_tokens:
	- '[UNK]'
	- '[SEP]'
	- '[CLS]'
	- '[PAD]'
	- '[MASK]'
	wordpieces_prefix: '##'
	train_config:
	name: wordpiece_tokenizer
	config_type: preprocessor
	vocab_size: 30000
	min_frequency: 2
	limit_alphabet: 1000
	initial_alphabet: []
	show_progress: true