|
{ |
|
"custom_vocab_files": [ |
|
"/home/usuaris/veu/casimiro.pio.carrino/projects/corpus-utils-lm/corpora/bio/biomedical-clinical.txt" |
|
], |
|
"vocab_name": "bio-biomedical-clinical-vocab-52k", |
|
"tokenizer": "bbpe", |
|
"lowercase": false, |
|
"vocab_size": 52000, |
|
"min_frequency": 10, |
|
"extra_tokens": [], |
|
"limit_alphabet": 1000, |
|
"no_show_progress": false, |
|
"strip_accents": false, |
|
"no_handle_chinese_chars": false, |
|
"no_clean_text": false, |
|
"reserve_tokens": 0, |
|
"use_tokenizers": false, |
|
"no_fairseq": false, |
|
"files": [ |
|
"/home/usuaris/veu/casimiro.pio.carrino/projects/corpus-utils-lm/corpora/bio/biomedical-clinical.txt" |
|
], |
|
"output_root_path": "/home/usuaris/veu/casimiro.pio.carrino/projects/corpus-utils-lm/output/model-ready_output/bio-biomedical-clinical-vocab-52k-2021-04-26-0955-3a71-240f", |
|
"commit_hash": "3a7116cf776527c411869becbe6fad8b9e3f5e56" |
|
} |