log_dir: "Checkpoint_40b_Cotlet_Lite" mixed_precision: no data_folder: "/home/ubuntu/001_PLBERT_JA/40B_dataset_W_fixed" batch_size: 26 save_interval: 10000 log_interval: 100 num_process: 1 # number of GPUs num_steps: 1000000 dataset_params: tokenizer: "" token_separator: " " # token used for phoneme separator (space) token_mask: "M" # token used for phoneme mask (M) word_separator: 2 # token used for word separator () token_maps: "/home/ubuntu/001_PLBERT_JA/token_maps_jp_200k_0.pkl" # token map path max_mel_length: 512 # max phoneme length word_mask_prob: 0.15 # probability to mask the entire word phoneme_mask_prob: 0.1 # probability to mask each phoneme replace_prob: 0.2 # probablity to replace phonemes model_params: vocab_size: 178 hidden_size: 768 num_attention_heads: 12 intermediate_size: 2048 max_position_embeddings: 512 num_hidden_layers: 12 dropout: 0.1