log_dir: "Checkpoint_40b_Cotlet_Lite" | |
mixed_precision: no | |
data_folder: "/home/ubuntu/001_PLBERT_JA/40B_dataset_W_fixed" | |
batch_size: 26 | |
save_interval: 10000 | |
log_interval: 100 | |
num_process: 1 # number of GPUs | |
num_steps: 1000000 | |
dataset_params: | |
tokenizer: "" | |
token_separator: " " # token used for phoneme separator (space) | |
token_mask: "M" # token used for phoneme mask (M) | |
word_separator: 2 # token used for word separator (<formula>) | |
token_maps: "/home/ubuntu/001_PLBERT_JA/token_maps_jp_200k_0.pkl" # token map path | |
max_mel_length: 512 # max phoneme length | |
word_mask_prob: 0.15 # probability to mask the entire word | |
phoneme_mask_prob: 0.1 # probability to mask each phoneme | |
replace_prob: 0.2 # probablity to replace phonemes | |
model_params: | |
vocab_size: 178 | |
hidden_size: 768 | |
num_attention_heads: 12 | |
intermediate_size: 2048 | |
max_position_embeddings: 512 | |
num_hidden_layers: 12 | |
dropout: 0.1 |